knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
## ─ Attaching packages ────────────────────────── tidyverse 1.3.0 ─
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.1 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ─ Conflicts ─────────────────────────── tidyverse_conflicts() ─
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(gapminder)
library(socviz)
library(ggrepel)
glimpse(gapminder)
## Rows: 1,704
## Columns: 6
## $ country <fct> Afghanistan, Afghanistan, Afghanistan, Afghanistan, Afghan…
## $ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia…
## $ year <int> 1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997…
## $ lifeExp <dbl> 28.801, 30.332, 31.997, 34.020, 36.088, 38.438, 39.854, 40…
## $ pop <int> 8425333, 9240934, 10267083, 11537966, 13079460, 14880372, …
## $ gdpPercap <dbl> 779.4453, 820.8530, 853.1007, 836.1971, 739.9811, 786.1134…
p <- ggplot(data = gapminder,
mapping = aes(x = log(gdpPercap), y = lifeExp))
# 線形回帰モデルとロバスト線形回帰モデル
p + geom_point(alpha = 0.1) +
geom_smooth(color = "tomato", fill = "tomato", method = MASS::rlm) +
geom_smooth(color = "steelblue", fill = "steelblue", method = "lm")
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
# 多項式回帰モデル
p + geom_point(alpha = 0.1) +
geom_smooth(color = "red", fill = "red", method = "lm", size = 1.2,
formula = y ~ splines::bs(x, df = 3))
# 分位点回帰モデル
p + geom_point(alpha = 0.1) +
geom_quantile(color = "red", size = 1.2, method = "rqss",
lambda = 1, quantils = c(0.20, 0.5, 0.85))
## Warning: Ignoring unknown parameters: quantils
## Smoothing formula not specified. Using: y ~ qss(x, lambda = 1)
複数の回帰直線をそれぞれ別の設定で重ねることは可能. しかしながら,凡例はデータの内部で結びついているわけではないから.
# 色の獲得
model_colors <- RColorBrewer::brewer.pal(3, "Set1")
model_colors
## [1] "#E41A1C" "#377EB8" "#4DAF4A"
p0 <- ggplot(data = gapminder,
mapping = aes(x = log(gdpPercap), y = lifeExp))
p1 <- p0 +
geom_point(alpha = 0.2) +
geom_smooth(method = "lm", aes(color = "OLS", fill = "OLS")) +
geom_smooth(method = "lm", formula = y ~ splines::bs(x, df = 3),
aes(color = "Cubic Spline", fill = "Cubic Spline")) +
geom_smooth(method = "loess", aes(color = "loess", fill = "loess"))
p1 + scale_color_manual(name = "Models", values = model_colors) +
scale_fill_manual(name = "Models", values = model_colors) +
theme(legend.position = "top")
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
本書では,データにあてはまるモデルを選択し,ggplotを使って情報を抽出,可視化する方法のみを述べる
Rでは常にオブジェクトを操作している. オブジェクトは名前付きの部分構造(数値やベクトル,formulaなど)を持っているので,簡単にアクセスできる
gapminder
## # A tibble: 1,704 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779.
## 2 Afghanistan Asia 1957 30.3 9240934 821.
## 3 Afghanistan Asia 1962 32.0 10267083 853.
## 4 Afghanistan Asia 1967 34.0 11537966 836.
## 5 Afghanistan Asia 1972 36.1 13079460 740.
## 6 Afghanistan Asia 1977 38.4 14880372 786.
## 7 Afghanistan Asia 1982 39.9 12881816 978.
## 8 Afghanistan Asia 1987 40.8 13867957 852.
## 9 Afghanistan Asia 1992 41.7 16317921 649.
## 10 Afghanistan Asia 1997 41.8 22227415 635.
## # … with 1,694 more rows
str(gapminder)
## tibble [1,704 × 6] (S3: tbl_df/tbl/data.frame)
## $ country : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ year : int [1:1704] 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
## $ lifeExp : num [1:1704] 28.8 30.3 32 34 36.1 ...
## $ pop : int [1:1704] 8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
## $ gdpPercap: num [1:1704] 779 821 853 836 740 ...
Rの統計モデルオブジェクトも内部にモデルの結果を格納しているが,複雑である
# 国と調査年の間に構造的な関係がある場合,線形モデルは正しくない?
out <- lm(formula = lifeExp ~ gdpPercap + pop + continent,
data = gapminder)
# formula記法
# 従属変数 ~ 独立変数
# summary()関数を使うことでモデルの概要がわかる
summary(out)
##
## Call:
## lm(formula = lifeExp ~ gdpPercap + pop + continent, data = gapminder)
##
## Residuals:
## Min 1Q Median 3Q Max
## -49.161 -4.486 0.297 5.110 25.175
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.781e+01 3.395e-01 140.819 < 2e-16 ***
## gdpPercap 4.495e-04 2.346e-05 19.158 < 2e-16 ***
## pop 6.570e-09 1.975e-09 3.326 0.000901 ***
## continentAmericas 1.348e+01 6.000e-01 22.458 < 2e-16 ***
## continentAsia 8.193e+00 5.712e-01 14.342 < 2e-16 ***
## continentEurope 1.747e+01 6.246e-01 27.973 < 2e-16 ***
## continentOceania 1.808e+01 1.782e+00 10.146 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.365 on 1697 degrees of freedom
## Multiple R-squared: 0.5821, Adjusted R-squared: 0.5806
## F-statistic: 393.9 on 6 and 1697 DF, p-value: < 2.2e-16
# str()関数を使うことでモデルオブジェクトの構造がわかる
str(out)
## List of 13
## $ coefficients : Named num [1:7] 4.78e+01 4.50e-04 6.57e-09 1.35e+01 8.19 ...
## ..- attr(*, "names")= chr [1:7] "(Intercept)" "gdpPercap" "pop" "continentAmericas" ...
## $ residuals : Named num [1:1704] -27.6 -26.1 -24.5 -22.4 -20.3 ...
## ..- attr(*, "names")= chr [1:1704] "1" "2" "3" "4" ...
## $ effects : Named num [1:1704] -2455.1 311.1 42.6 101.1 -17.2 ...
## ..- attr(*, "names")= chr [1:1704] "(Intercept)" "gdpPercap" "pop" "continentAmericas" ...
## $ rank : int 7
## $ fitted.values: Named num [1:1704] 56.4 56.4 56.5 56.5 56.4 ...
## ..- attr(*, "names")= chr [1:1704] "1" "2" "3" "4" ...
## $ assign : int [1:7] 0 1 2 3 3 3 3
## $ qr :List of 5
## ..$ qr : num [1:1704, 1:7] -41.2795 0.0242 0.0242 0.0242 0.0242 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:1704] "1" "2" "3" "4" ...
## .. .. ..$ : chr [1:7] "(Intercept)" "gdpPercap" "pop" "continentAmericas" ...
## .. ..- attr(*, "assign")= int [1:7] 0 1 2 3 3 3 3
## .. ..- attr(*, "contrasts")=List of 1
## .. .. ..$ continent: chr "contr.treatment"
## ..$ qraux: num [1:7] 1.02 1.02 1 1.01 1.04 ...
## ..$ pivot: int [1:7] 1 2 3 4 5 6 7
## ..$ tol : num 1e-07
## ..$ rank : int 7
## ..- attr(*, "class")= chr "qr"
## $ df.residual : int 1697
## $ contrasts :List of 1
## ..$ continent: chr "contr.treatment"
## $ xlevels :List of 1
## ..$ continent: chr [1:5] "Africa" "Americas" "Asia" "Europe" ...
## $ call : language lm(formula = lifeExp ~ gdpPercap + pop + continent, data = gapminder)
## $ terms :Classes 'terms', 'formula' language lifeExp ~ gdpPercap + pop + continent
## .. ..- attr(*, "variables")= language list(lifeExp, gdpPercap, pop, continent)
## .. ..- attr(*, "factors")= int [1:4, 1:3] 0 1 0 0 0 0 1 0 0 0 ...
## .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. ..$ : chr [1:4] "lifeExp" "gdpPercap" "pop" "continent"
## .. .. .. ..$ : chr [1:3] "gdpPercap" "pop" "continent"
## .. ..- attr(*, "term.labels")= chr [1:3] "gdpPercap" "pop" "continent"
## .. ..- attr(*, "order")= int [1:3] 1 1 1
## .. ..- attr(*, "intercept")= int 1
## .. ..- attr(*, "response")= int 1
## .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## .. ..- attr(*, "predvars")= language list(lifeExp, gdpPercap, pop, continent)
## .. ..- attr(*, "dataClasses")= Named chr [1:4] "numeric" "numeric" "numeric" "factor"
## .. .. ..- attr(*, "names")= chr [1:4] "lifeExp" "gdpPercap" "pop" "continent"
## $ model :'data.frame': 1704 obs. of 4 variables:
## ..$ lifeExp : num [1:1704] 28.8 30.3 32 34 36.1 ...
## ..$ gdpPercap: num [1:1704] 779 821 853 836 740 ...
## ..$ pop : int [1:1704] 8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
## ..$ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
## ..- attr(*, "terms")=Classes 'terms', 'formula' language lifeExp ~ gdpPercap + pop + continent
## .. .. ..- attr(*, "variables")= language list(lifeExp, gdpPercap, pop, continent)
## .. .. ..- attr(*, "factors")= int [1:4, 1:3] 0 1 0 0 0 0 1 0 0 0 ...
## .. .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. .. ..$ : chr [1:4] "lifeExp" "gdpPercap" "pop" "continent"
## .. .. .. .. ..$ : chr [1:3] "gdpPercap" "pop" "continent"
## .. .. ..- attr(*, "term.labels")= chr [1:3] "gdpPercap" "pop" "continent"
## .. .. ..- attr(*, "order")= int [1:3] 1 1 1
## .. .. ..- attr(*, "intercept")= int 1
## .. .. ..- attr(*, "response")= int 1
## .. .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## .. .. ..- attr(*, "predvars")= language list(lifeExp, gdpPercap, pop, continent)
## .. .. ..- attr(*, "dataClasses")= Named chr [1:4] "numeric" "numeric" "numeric" "factor"
## .. .. .. ..- attr(*, "names")= chr [1:4] "lifeExp" "gdpPercap" "pop" "continent"
## - attr(*, "class")= chr "lm"
モデルの結果を効果的に可視化するのは難しい. なぜならばモデルの結果を示す際にはデータの背景知識に基づく知見とモデルの解釈の両方が必要になるから
モデルの推定値の図示はモデルの適切な推定と深く関わっているため,統計をしっかり学ぶ以外に作図のスキルをあげる方法はありません. モデルの中身を理解しないまま可視化するのはやめる
モデルの可視化を適切に行えれば,分析で得ようとしている問題に対して実質的に意味があり,かつ直接結果を解釈できる図が得られる. 解釈可能な結果を示したいなら,読み手が容易に理解できる尺度を使う必要がある.
結果の不確実性や信頼度をはっきりと可視化する図を作るのは難しい. モデル推定にはさまざまな指標(精度,信頼区間,信用区間,有意性など)が用いられるが, これらの情報が本来持っている情報量以上に過信する傾向があり, 解釈を誤ったり,結果から言えること以上のことを言ってしまいがち
多変量モデルの結果の図示. - 回帰係数の氷河実際にどういう意味なのかを,有意性や傾きの大きさを示し,重要度で分類する - 単純なモデルの回帰係数ではなく,関心のある範囲におけるいくつかの変数の予測値を示すこと - 元データの上から,モデルの推定値を元データに組み合わせて,可視化する
Rで用いられる関数はさまざまなオブジェクトに対して汎用的に使うことができる.
predict()関数: モデルオブジェクトから予測値を生成する関数
# モデルの作成
out <- lm(formula = lifeExp ~ gdpPercap + pop + continent,
data = gapminder)
# 予測するための新しいデータの作成
min_gdp <- min(gapminder$gdpPercap)
max_gdp <- max(gapminder$gdpPercap)
med_pop <- median(gapminder$pop)
# 他らしいデータの作成
pred_df <- expand.grid(gdpPercap = (seq(from = min_gdp,
to = max_gdp,
length.out = 100)),
pop = med_pop,
continent = c("Africa", "Americas",
"Asia", "Europe", "Oceania"))
dim(pred_df)
## [1] 500 3
head(pred_df)
## gdpPercap pop continent
## 1 241.1659 7023596 Africa
## 2 1385.4282 7023596 Africa
## 3 2529.6905 7023596 Africa
## 4 3673.9528 7023596 Africa
## 5 4818.2150 7023596 Africa
## 6 5962.4773 7023596 Africa
# 予測
pred_out <- predict(object = out,
newdata = pred_df,
interval = "predict")
head(pred_out)
## fit lwr upr
## 1 47.96863 31.54775 64.38951
## 2 48.48298 32.06231 64.90365
## 3 48.99733 32.57670 65.41797
## 4 49.51169 33.09092 65.93245
## 5 50.02604 33.60497 66.44711
## 6 50.54039 34.11885 66.96193
# 予測のためのデータと予測結果を結合させる
pred_df <- cbind(pred_df, pred_out)
pred_df
## gdpPercap pop continent fit lwr upr
## 1 241.1659 7023596 Africa 47.96863 31.54775 64.38951
## 2 1385.4282 7023596 Africa 48.48298 32.06231 64.90365
## 3 2529.6905 7023596 Africa 48.99733 32.57670 65.41797
## 4 3673.9528 7023596 Africa 49.51169 33.09092 65.93245
## 5 4818.2150 7023596 Africa 50.02604 33.60497 66.44711
## 6 5962.4773 7023596 Africa 50.54039 34.11885 66.96193
## 7 7106.7396 7023596 Africa 51.05474 34.63256 67.47692
## 8 8251.0019 7023596 Africa 51.56910 35.14611 67.99208
## 9 9395.2642 7023596 Africa 52.08345 35.65949 68.50741
## 10 10539.5265 7023596 Africa 52.59780 36.17269 69.02291
## 11 11683.7888 7023596 Africa 53.11215 36.68573 69.53857
## 12 12828.0511 7023596 Africa 53.62651 37.19860 70.05441
## 13 13972.3134 7023596 Africa 54.14086 37.71130 70.57041
## 14 15116.5757 7023596 Africa 54.65521 38.22384 71.08658
## 15 16260.8380 7023596 Africa 55.16956 38.73620 71.60292
## 16 17405.1003 7023596 Africa 55.68391 39.24840 72.11943
## 17 18549.3626 7023596 Africa 56.19827 39.76042 72.63611
## 18 19693.6249 7023596 Africa 56.71262 40.27228 73.15296
## 19 20837.8872 7023596 Africa 57.22697 40.78397 73.66997
## 20 21982.1494 7023596 Africa 57.74132 41.29550 74.18715
## 21 23126.4117 7023596 Africa 58.25568 41.80685 74.70450
## 22 24270.6740 7023596 Africa 58.77003 42.31804 75.22202
## 23 25414.9363 7023596 Africa 59.28438 42.82906 75.73971
## 24 26559.1986 7023596 Africa 59.79873 43.33991 76.25756
## 25 27703.4609 7023596 Africa 60.31309 43.85059 76.77558
## 26 28847.7232 7023596 Africa 60.82744 44.36111 77.29377
## 27 29991.9855 7023596 Africa 61.34179 44.87145 77.81213
## 28 31136.2478 7023596 Africa 61.85614 45.38163 78.33066
## 29 32280.5101 7023596 Africa 62.37050 45.89165 78.84935
## 30 33424.7724 7023596 Africa 62.88485 46.40149 79.36821
## 31 34569.0347 7023596 Africa 63.39920 46.91117 79.88723
## 32 35713.2970 7023596 Africa 63.91355 47.42069 80.40642
## 33 36857.5593 7023596 Africa 64.42791 47.93003 80.92578
## 34 38001.8216 7023596 Africa 64.94226 48.43921 81.44531
## 35 39146.0838 7023596 Africa 65.45661 48.94822 81.96500
## 36 40290.3461 7023596 Africa 65.97096 49.45707 82.48486
## 37 41434.6084 7023596 Africa 66.48532 49.96575 83.00488
## 38 42578.8707 7023596 Africa 66.99967 50.47427 83.52507
## 39 43723.1330 7023596 Africa 67.51402 50.98262 84.04543
## 40 44867.3953 7023596 Africa 68.02837 51.49080 84.56595
## 41 46011.6576 7023596 Africa 68.54273 51.99882 85.08664
## 42 47155.9199 7023596 Africa 69.05708 52.50667 85.60749
## 43 48300.1822 7023596 Africa 69.57143 53.01436 86.12850
## 44 49444.4445 7023596 Africa 70.08578 53.52188 86.64968
## 45 50588.7068 7023596 Africa 70.60014 54.02924 87.17103
## 46 51732.9691 7023596 Africa 71.11449 54.53644 87.69254
## 47 52877.2314 7023596 Africa 71.62884 55.04347 88.21421
## 48 54021.4937 7023596 Africa 72.14319 55.55034 88.73605
## 49 55165.7559 7023596 Africa 72.65755 56.05704 89.25805
## 50 56310.0182 7023596 Africa 73.17190 56.56358 89.78022
## 51 57454.2805 7023596 Africa 73.68625 57.06996 90.30255
## 52 58598.5428 7023596 Africa 74.20060 57.57617 90.82504
## 53 59742.8051 7023596 Africa 74.71496 58.08222 91.34769
## 54 60887.0674 7023596 Africa 75.22931 58.58811 91.87051
## 55 62031.3297 7023596 Africa 75.74366 59.09384 92.39349
## 56 63175.5920 7023596 Africa 76.25801 59.59940 92.91663
## 57 64319.8543 7023596 Africa 76.77237 60.10480 93.43993
## 58 65464.1166 7023596 Africa 77.28672 60.61004 93.96340
## 59 66608.3789 7023596 Africa 77.80107 61.11512 94.48702
## 60 67752.6412 7023596 Africa 78.31542 61.62004 95.01081
## 61 68896.9035 7023596 Africa 78.82978 62.12480 95.53475
## 62 70041.1658 7023596 Africa 79.34413 62.62940 96.05886
## 63 71185.4281 7023596 Africa 79.85848 63.13384 96.58313
## 64 72329.6903 7023596 Africa 80.37283 63.63811 97.10756
## 65 73473.9526 7023596 Africa 80.88719 64.14223 97.63214
## 66 74618.2149 7023596 Africa 81.40154 64.64619 98.15689
## 67 75762.4772 7023596 Africa 81.91589 65.14999 98.68179
## 68 76906.7395 7023596 Africa 82.43024 65.65363 99.20686
## 69 78051.0018 7023596 Africa 82.94460 66.15712 99.73208
## 70 79195.2641 7023596 Africa 83.45895 66.66044 100.25746
## 71 80339.5264 7023596 Africa 83.97330 67.16361 100.78300
## 72 81483.7887 7023596 Africa 84.48765 67.66662 101.30869
## 73 82628.0510 7023596 Africa 85.00201 68.16947 101.83454
## 74 83772.3133 7023596 Africa 85.51636 68.67217 102.36055
## 75 84916.5756 7023596 Africa 86.03071 69.17471 102.88672
## 76 86060.8379 7023596 Africa 86.54506 69.67709 103.41304
## 77 87205.1002 7023596 Africa 87.05942 70.17932 103.93952
## 78 88349.3625 7023596 Africa 87.57377 70.68139 104.46615
## 79 89493.6247 7023596 Africa 88.08812 71.18331 104.99294
## 80 90637.8870 7023596 Africa 88.60247 71.68507 105.51988
## 81 91782.1493 7023596 Africa 89.11683 72.18668 106.04698
## 82 92926.4116 7023596 Africa 89.63118 72.68813 106.57423
## 83 94070.6739 7023596 Africa 90.14553 73.18943 107.10163
## 84 95214.9362 7023596 Africa 90.65988 73.69058 107.62919
## 85 96359.1985 7023596 Africa 91.17424 74.19157 108.15690
## 86 97503.4608 7023596 Africa 91.68859 74.69241 108.68477
## 87 98647.7231 7023596 Africa 92.20294 75.19310 109.21278
## 88 99791.9854 7023596 Africa 92.71729 75.69364 109.74095
## 89 100936.2477 7023596 Africa 93.23165 76.19402 110.26927
## 90 102080.5100 7023596 Africa 93.74600 76.69426 110.79774
## 91 103224.7723 7023596 Africa 94.26035 77.19434 111.32636
## 92 104369.0346 7023596 Africa 94.77470 77.69427 111.85513
## 93 105513.2968 7023596 Africa 95.28906 78.19406 112.38406
## 94 106657.5591 7023596 Africa 95.80341 78.69369 112.91313
## 95 107801.8214 7023596 Africa 96.31776 79.19317 113.44235
## 96 108946.0837 7023596 Africa 96.83211 79.69251 113.97172
## 97 110090.3460 7023596 Africa 97.34647 80.19170 114.50124
## 98 111234.6083 7023596 Africa 97.86082 80.69073 115.03090
## 99 112378.8706 7023596 Africa 98.37517 81.18962 115.56072
## 100 113523.1329 7023596 Africa 98.88952 81.68837 116.09068
## 101 241.1659 7023596 Americas 61.44457 45.00649 77.88265
## 102 1385.4282 7023596 Americas 61.95892 45.52179 78.39606
## 103 2529.6905 7023596 Americas 62.47328 46.03691 78.90964
## 104 3673.9528 7023596 Americas 62.98763 46.55187 79.42338
## 105 4818.2150 7023596 Americas 63.50198 47.06666 79.93730
## 106 5962.4773 7023596 Americas 64.01633 47.58129 80.45138
## 107 7106.7396 7023596 Americas 64.53069 48.09574 80.96563
## 108 8251.0019 7023596 Americas 65.04504 48.61002 81.48005
## 109 9395.2642 7023596 Americas 65.55939 49.12414 81.99464
## 110 10539.5265 7023596 Americas 66.07374 49.63809 82.50940
## 111 11683.7888 7023596 Americas 66.58810 50.15186 83.02433
## 112 12828.0511 7023596 Americas 67.10245 50.66547 83.53942
## 113 13972.3134 7023596 Americas 67.61680 51.17891 84.05469
## 114 15116.5757 7023596 Americas 68.13115 51.69219 84.57012
## 115 16260.8380 7023596 Americas 68.64551 52.20529 85.08572
## 116 17405.1003 7023596 Americas 69.15986 52.71822 85.60149
## 117 18549.3626 7023596 Americas 69.67421 53.23099 86.11743
## 118 19693.6249 7023596 Americas 70.18856 53.74359 86.63354
## 119 20837.8872 7023596 Americas 70.70292 54.25602 87.14981
## 120 21982.1494 7023596 Americas 71.21727 54.76828 87.66626
## 121 23126.4117 7023596 Americas 71.73162 55.28037 88.18287
## 122 24270.6740 7023596 Americas 72.24597 55.79230 88.69965
## 123 25414.9363 7023596 Americas 72.76033 56.30405 89.21660
## 124 26559.1986 7023596 Americas 73.27468 56.81564 89.73371
## 125 27703.4609 7023596 Americas 73.78903 57.32706 90.25100
## 126 28847.7232 7023596 Americas 74.30338 57.83832 90.76845
## 127 29991.9855 7023596 Americas 74.81774 58.34940 91.28607
## 128 31136.2478 7023596 Americas 75.33209 58.86032 91.80386
## 129 32280.5101 7023596 Americas 75.84644 59.37107 92.32181
## 130 33424.7724 7023596 Americas 76.36079 59.88165 92.83994
## 131 34569.0347 7023596 Americas 76.87515 60.39206 93.35823
## 132 35713.2970 7023596 Americas 77.38950 60.90231 93.87669
## 133 36857.5593 7023596 Americas 77.90385 61.41239 94.39531
## 134 38001.8216 7023596 Americas 78.41820 61.92230 94.91410
## 135 39146.0838 7023596 Americas 78.93256 62.43205 95.43306
## 136 40290.3461 7023596 Americas 79.44691 62.94163 95.95219
## 137 41434.6084 7023596 Americas 79.96126 63.45104 96.47148
## 138 42578.8707 7023596 Americas 80.47561 63.96029 96.99094
## 139 43723.1330 7023596 Americas 80.98997 64.46937 97.51056
## 140 44867.3953 7023596 Americas 81.50432 64.97829 98.03035
## 141 46011.6576 7023596 Americas 82.01867 65.48703 98.55031
## 142 47155.9199 7023596 Americas 82.53302 65.99562 99.07043
## 143 48300.1822 7023596 Americas 83.04738 66.50403 99.59072
## 144 49444.4445 7023596 Americas 83.56173 67.01228 100.11117
## 145 50588.7068 7023596 Americas 84.07608 67.52037 100.63179
## 146 51732.9691 7023596 Americas 84.59043 68.02829 101.15257
## 147 52877.2314 7023596 Americas 85.10479 68.53605 101.67352
## 148 54021.4937 7023596 Americas 85.61914 69.04364 102.19464
## 149 55165.7559 7023596 Americas 86.13349 69.55107 102.71591
## 150 56310.0182 7023596 Americas 86.64784 70.05833 103.23736
## 151 57454.2805 7023596 Americas 87.16220 70.56543 103.75896
## 152 58598.5428 7023596 Americas 87.67655 71.07236 104.28073
## 153 59742.8051 7023596 Americas 88.19090 71.57914 104.80266
## 154 60887.0674 7023596 Americas 88.70525 72.08574 105.32476
## 155 62031.3297 7023596 Americas 89.21961 72.59219 105.84702
## 156 63175.5920 7023596 Americas 89.73396 73.09847 106.36944
## 157 64319.8543 7023596 Americas 90.24831 73.60459 106.89203
## 158 65464.1166 7023596 Americas 90.76266 74.11055 107.41478
## 159 66608.3789 7023596 Americas 91.27702 74.61634 107.93769
## 160 67752.6412 7023596 Americas 91.79137 75.12197 108.46076
## 161 68896.9035 7023596 Americas 92.30572 75.62745 108.98399
## 162 70041.1658 7023596 Americas 92.82007 76.13276 109.50739
## 163 71185.4281 7023596 Americas 93.33443 76.63790 110.03095
## 164 72329.6903 7023596 Americas 93.84878 77.14289 110.55466
## 165 73473.9526 7023596 Americas 94.36313 77.64772 111.07854
## 166 74618.2149 7023596 Americas 94.87748 78.15238 111.60258
## 167 75762.4772 7023596 Americas 95.39184 78.65689 112.12678
## 168 76906.7395 7023596 Americas 95.90619 79.16124 112.65114
## 169 78051.0018 7023596 Americas 96.42054 79.66542 113.17566
## 170 79195.2641 7023596 Americas 96.93489 80.16945 113.70033
## 171 80339.5264 7023596 Americas 97.44925 80.67332 114.22517
## 172 81483.7887 7023596 Americas 97.96360 81.17703 114.75016
## 173 82628.0510 7023596 Americas 98.47795 81.68058 115.27532
## 174 83772.3133 7023596 Americas 98.99230 82.18398 115.80063
## 175 84916.5756 7023596 Americas 99.50666 82.68721 116.32610
## 176 86060.8379 7023596 Americas 100.02101 83.19029 116.85172
## 177 87205.1002 7023596 Americas 100.53536 83.69321 117.37751
## 178 88349.3625 7023596 Americas 101.04971 84.19598 117.90345
## 179 89493.6247 7023596 Americas 101.56406 84.69859 118.42954
## 180 90637.8870 7023596 Americas 102.07842 85.20104 118.95580
## 181 91782.1493 7023596 Americas 102.59277 85.70334 119.48220
## 182 92926.4116 7023596 Americas 103.10712 86.20548 120.00877
## 183 94070.6739 7023596 Americas 103.62147 86.70746 120.53549
## 184 95214.9362 7023596 Americas 104.13583 87.20929 121.06236
## 185 96359.1985 7023596 Americas 104.65018 87.71097 121.58939
## 186 97503.4608 7023596 Americas 105.16453 88.21249 122.11657
## 187 98647.7231 7023596 Americas 105.67888 88.71386 122.64391
## 188 99791.9854 7023596 Americas 106.19324 89.21508 123.17140
## 189 100936.2477 7023596 Americas 106.70759 89.71614 123.69904
## 190 102080.5100 7023596 Americas 107.22194 90.21705 124.22684
## 191 103224.7723 7023596 Americas 107.73629 90.71781 124.75478
## 192 104369.0346 7023596 Americas 108.25065 91.21841 125.28288
## 193 105513.2968 7023596 Americas 108.76500 91.71886 125.81114
## 194 106657.5591 7023596 Americas 109.27935 92.21917 126.33954
## 195 107801.8214 7023596 Americas 109.79370 92.71932 126.86809
## 196 108946.0837 7023596 Americas 110.30806 93.21932 127.39679
## 197 110090.3460 7023596 Americas 110.82241 93.71917 127.92565
## 198 111234.6083 7023596 Americas 111.33676 94.21887 128.45465
## 199 112378.8706 7023596 Americas 111.85111 94.71842 128.98380
## 200 113523.1329 7023596 Americas 112.36547 95.21783 129.51311
## 201 241.1659 7023596 Asia 56.16126 39.72673 72.59578
## 202 1385.4282 7023596 Asia 56.67561 40.24218 73.10904
## 203 2529.6905 7023596 Asia 57.18996 40.75746 73.62247
## 204 3673.9528 7023596 Asia 57.70432 41.27256 74.13607
## 205 4818.2150 7023596 Asia 58.21867 41.78750 74.64984
## 206 5962.4773 7023596 Asia 58.73302 42.30227 75.16377
## 207 7106.7396 7023596 Asia 59.24737 42.81687 75.67788
## 208 8251.0019 7023596 Asia 59.76173 43.33131 76.19215
## 209 9395.2642 7023596 Asia 60.27608 43.84557 76.70659
## 210 10539.5265 7023596 Asia 60.79043 44.35967 77.22120
## 211 11683.7888 7023596 Asia 61.30478 44.87359 77.73598
## 212 12828.0511 7023596 Asia 61.81914 45.38735 78.25092
## 213 13972.3134 7023596 Asia 62.33349 45.90094 78.76604
## 214 15116.5757 7023596 Asia 62.84784 46.41436 79.28133
## 215 16260.8380 7023596 Asia 63.36219 46.92761 79.79678
## 216 17405.1003 7023596 Asia 63.87655 47.44069 80.31240
## 217 18549.3626 7023596 Asia 64.39090 47.95361 80.82819
## 218 19693.6249 7023596 Asia 64.90525 48.46635 81.34415
## 219 20837.8872 7023596 Asia 65.41960 48.97893 81.86028
## 220 21982.1494 7023596 Asia 65.93396 49.49134 82.37658
## 221 23126.4117 7023596 Asia 66.44831 50.00358 82.89304
## 222 24270.6740 7023596 Asia 66.96266 50.51565 83.40967
## 223 25414.9363 7023596 Asia 67.47701 51.02755 83.92647
## 224 26559.1986 7023596 Asia 67.99137 51.53929 84.44344
## 225 27703.4609 7023596 Asia 68.50572 52.05086 84.96058
## 226 28847.7232 7023596 Asia 69.02007 52.56226 85.47789
## 227 29991.9855 7023596 Asia 69.53442 53.07349 85.99536
## 228 31136.2478 7023596 Asia 70.04878 53.58455 86.51300
## 229 32280.5101 7023596 Asia 70.56313 54.09545 87.03081
## 230 33424.7724 7023596 Asia 71.07748 54.60618 87.54879
## 231 34569.0347 7023596 Asia 71.59183 55.11674 88.06693
## 232 35713.2970 7023596 Asia 72.10619 55.62713 88.58524
## 233 36857.5593 7023596 Asia 72.62054 56.13736 89.10372
## 234 38001.8216 7023596 Asia 73.13489 56.64741 89.62237
## 235 39146.0838 7023596 Asia 73.64924 57.15731 90.14118
## 236 40290.3461 7023596 Asia 74.16360 57.66703 90.66016
## 237 41434.6084 7023596 Asia 74.67795 58.17659 91.17931
## 238 42578.8707 7023596 Asia 75.19230 58.68598 91.69862
## 239 43723.1330 7023596 Asia 75.70665 59.19521 92.21810
## 240 44867.3953 7023596 Asia 76.22101 59.70427 92.73775
## 241 46011.6576 7023596 Asia 76.73536 60.21316 93.25756
## 242 47155.9199 7023596 Asia 77.24971 60.72189 93.77754
## 243 48300.1822 7023596 Asia 77.76406 61.23045 94.29768
## 244 49444.4445 7023596 Asia 78.27842 61.73884 94.81799
## 245 50588.7068 7023596 Asia 78.79277 62.24707 95.33847
## 246 51732.9691 7023596 Asia 79.30712 62.75514 95.85911
## 247 52877.2314 7023596 Asia 79.82147 63.26304 96.37991
## 248 54021.4937 7023596 Asia 80.33583 63.77077 96.90088
## 249 55165.7559 7023596 Asia 80.85018 64.27834 97.42202
## 250 56310.0182 7023596 Asia 81.36453 64.78575 97.94332
## 251 57454.2805 7023596 Asia 81.87888 65.29299 98.46478
## 252 58598.5428 7023596 Asia 82.39324 65.80007 98.98641
## 253 59742.8051 7023596 Asia 82.90759 66.30698 99.50820
## 254 60887.0674 7023596 Asia 83.42194 66.81373 100.03015
## 255 62031.3297 7023596 Asia 83.93629 67.32031 100.55227
## 256 63175.5920 7023596 Asia 84.45065 67.82674 101.07456
## 257 64319.8543 7023596 Asia 84.96500 68.33300 101.59700
## 258 65464.1166 7023596 Asia 85.47935 68.83910 102.11961
## 259 66608.3789 7023596 Asia 85.99370 69.34503 102.64238
## 260 67752.6412 7023596 Asia 86.50806 69.85080 103.16531
## 261 68896.9035 7023596 Asia 87.02241 70.35641 103.68840
## 262 70041.1658 7023596 Asia 87.53676 70.86186 104.21166
## 263 71185.4281 7023596 Asia 88.05111 71.36715 104.73508
## 264 72329.6903 7023596 Asia 88.56547 71.87228 105.25866
## 265 73473.9526 7023596 Asia 89.07982 72.37724 105.78239
## 266 74618.2149 7023596 Asia 89.59417 72.88205 106.30629
## 267 75762.4772 7023596 Asia 90.10852 73.38669 106.83036
## 268 76906.7395 7023596 Asia 90.62288 73.89118 107.35458
## 269 78051.0018 7023596 Asia 91.13723 74.39550 107.87896
## 270 79195.2641 7023596 Asia 91.65158 74.89967 108.40350
## 271 80339.5264 7023596 Asia 92.16593 75.40367 108.92820
## 272 81483.7887 7023596 Asia 92.68029 75.90752 109.45305
## 273 82628.0510 7023596 Asia 93.19464 76.41121 109.97807
## 274 83772.3133 7023596 Asia 93.70899 76.91474 110.50325
## 275 84916.5756 7023596 Asia 94.22334 77.41811 111.02858
## 276 86060.8379 7023596 Asia 94.73770 77.92132 111.55407
## 277 87205.1002 7023596 Asia 95.25205 78.42438 112.07972
## 278 88349.3625 7023596 Asia 95.76640 78.92728 112.60552
## 279 89493.6247 7023596 Asia 96.28075 79.43002 113.13148
## 280 90637.8870 7023596 Asia 96.79511 79.93261 113.65760
## 281 91782.1493 7023596 Asia 97.30946 80.43504 114.18388
## 282 92926.4116 7023596 Asia 97.82381 80.93731 114.71031
## 283 94070.6739 7023596 Asia 98.33816 81.43943 115.23689
## 284 95214.9362 7023596 Asia 98.85252 81.94140 115.76364
## 285 96359.1985 7023596 Asia 99.36687 82.44321 116.29053
## 286 97503.4608 7023596 Asia 99.88122 82.94486 116.81758
## 287 98647.7231 7023596 Asia 100.39557 83.44636 117.34479
## 288 99791.9854 7023596 Asia 100.90993 83.94771 117.87214
## 289 100936.2477 7023596 Asia 101.42428 84.44890 118.39966
## 290 102080.5100 7023596 Asia 101.93863 84.94994 118.92732
## 291 103224.7723 7023596 Asia 102.45298 85.45083 119.45514
## 292 104369.0346 7023596 Asia 102.96734 85.95156 119.98311
## 293 105513.2968 7023596 Asia 103.48169 86.45215 120.51123
## 294 106657.5591 7023596 Asia 103.99604 86.95258 121.03950
## 295 107801.8214 7023596 Asia 104.51039 87.45286 121.56793
## 296 108946.0837 7023596 Asia 105.02475 87.95299 122.09650
## 297 110090.3460 7023596 Asia 105.53910 88.45297 122.62523
## 298 111234.6083 7023596 Asia 106.05345 88.95280 123.15410
## 299 112378.8706 7023596 Asia 106.56780 89.45248 123.68313
## 300 113523.1329 7023596 Asia 107.08216 89.95201 124.21230
## 301 241.1659 7023596 Europe 65.44132 48.99789 81.88475
## 302 1385.4282 7023596 Europe 65.95567 49.51426 82.39708
## 303 2529.6905 7023596 Europe 66.47003 50.03047 82.90959
## 304 3673.9528 7023596 Europe 66.98438 50.54650 83.42226
## 305 4818.2150 7023596 Europe 67.49873 51.06237 83.93509
## 306 5962.4773 7023596 Europe 68.01308 51.57806 84.44810
## 307 7106.7396 7023596 Europe 68.52744 52.09359 84.96128
## 308 8251.0019 7023596 Europe 69.04179 52.60896 85.47462
## 309 9395.2642 7023596 Europe 69.55614 53.12415 85.98813
## 310 10539.5265 7023596 Europe 70.07049 53.63917 86.50182
## 311 11683.7888 7023596 Europe 70.58485 54.15402 87.01567
## 312 12828.0511 7023596 Europe 71.09920 54.66871 87.52968
## 313 13972.3134 7023596 Europe 71.61355 55.18323 88.04387
## 314 15116.5757 7023596 Europe 72.12790 55.69758 88.55823
## 315 16260.8380 7023596 Europe 72.64226 56.21176 89.07276
## 316 17405.1003 7023596 Europe 73.15661 56.72577 89.58745
## 317 18549.3626 7023596 Europe 73.67096 57.23961 90.10231
## 318 19693.6249 7023596 Europe 74.18531 57.75328 90.61734
## 319 20837.8872 7023596 Europe 74.69967 58.26679 91.13254
## 320 21982.1494 7023596 Europe 75.21402 58.78012 91.64791
## 321 23126.4117 7023596 Europe 75.72837 59.29329 92.16345
## 322 24270.6740 7023596 Europe 76.24272 59.80629 92.67916
## 323 25414.9363 7023596 Europe 76.75708 60.31912 93.19503
## 324 26559.1986 7023596 Europe 77.27143 60.83178 93.71108
## 325 27703.4609 7023596 Europe 77.78578 61.34427 94.22729
## 326 28847.7232 7023596 Europe 78.30013 61.85660 94.74367
## 327 29991.9855 7023596 Europe 78.81449 62.36875 95.26022
## 328 31136.2478 7023596 Europe 79.32884 62.88074 95.77693
## 329 32280.5101 7023596 Europe 79.84319 63.39256 96.29382
## 330 33424.7724 7023596 Europe 80.35754 63.90421 96.81087
## 331 34569.0347 7023596 Europe 80.87190 64.41570 97.32810
## 332 35713.2970 7023596 Europe 81.38625 64.92701 97.84548
## 333 36857.5593 7023596 Europe 81.90060 65.43816 98.36304
## 334 38001.8216 7023596 Europe 82.41495 65.94914 98.88077
## 335 39146.0838 7023596 Europe 82.92931 66.45995 99.39866
## 336 40290.3461 7023596 Europe 83.44366 66.97059 99.91672
## 337 41434.6084 7023596 Europe 83.95801 67.48107 100.43495
## 338 42578.8707 7023596 Europe 84.47236 67.99138 100.95334
## 339 43723.1330 7023596 Europe 84.98671 68.50152 101.47191
## 340 44867.3953 7023596 Europe 85.50107 69.01150 101.99064
## 341 46011.6576 7023596 Europe 86.01542 69.52131 102.50953
## 342 47155.9199 7023596 Europe 86.52977 70.03095 103.02860
## 343 48300.1822 7023596 Europe 87.04412 70.54042 103.54783
## 344 49444.4445 7023596 Europe 87.55848 71.04973 104.06722
## 345 50588.7068 7023596 Europe 88.07283 71.55887 104.58678
## 346 51732.9691 7023596 Europe 88.58718 72.06785 105.10651
## 347 52877.2314 7023596 Europe 89.10153 72.57666 105.62641
## 348 54021.4937 7023596 Europe 89.61589 73.08530 106.14647
## 349 55165.7559 7023596 Europe 90.13024 73.59378 106.66670
## 350 56310.0182 7023596 Europe 90.64459 74.10210 107.18709
## 351 57454.2805 7023596 Europe 91.15894 74.61024 107.70765
## 352 58598.5428 7023596 Europe 91.67330 75.11822 108.22837
## 353 59742.8051 7023596 Europe 92.18765 75.62604 108.74926
## 354 60887.0674 7023596 Europe 92.70200 76.13369 109.27031
## 355 62031.3297 7023596 Europe 93.21635 76.64118 109.79153
## 356 63175.5920 7023596 Europe 93.73071 77.14851 110.31291
## 357 64319.8543 7023596 Europe 94.24506 77.65566 110.83445
## 358 65464.1166 7023596 Europe 94.75941 78.16266 111.35616
## 359 66608.3789 7023596 Europe 95.27376 78.66949 111.87804
## 360 67752.6412 7023596 Europe 95.78812 79.17616 112.40008
## 361 68896.9035 7023596 Europe 96.30247 79.68266 112.92228
## 362 70041.1658 7023596 Europe 96.81682 80.18901 113.44464
## 363 71185.4281 7023596 Europe 97.33117 80.69518 113.96716
## 364 72329.6903 7023596 Europe 97.84553 81.20120 114.48985
## 365 73473.9526 7023596 Europe 98.35988 81.70705 115.01270
## 366 74618.2149 7023596 Europe 98.87423 82.21275 115.53572
## 367 75762.4772 7023596 Europe 99.38858 82.71828 116.05889
## 368 76906.7395 7023596 Europe 99.90294 83.22364 116.58223
## 369 78051.0018 7023596 Europe 100.41729 83.72885 117.10573
## 370 79195.2641 7023596 Europe 100.93164 84.23390 117.62939
## 371 80339.5264 7023596 Europe 101.44599 84.73878 118.15321
## 372 81483.7887 7023596 Europe 101.96035 85.24351 118.67719
## 373 82628.0510 7023596 Europe 102.47470 85.74807 119.20133
## 374 83772.3133 7023596 Europe 102.98905 86.25248 119.72563
## 375 84916.5756 7023596 Europe 103.50340 86.75672 120.25009
## 376 86060.8379 7023596 Europe 104.01776 87.26081 120.77471
## 377 87205.1002 7023596 Europe 104.53211 87.76473 121.29949
## 378 88349.3625 7023596 Europe 105.04646 88.26850 121.82442
## 379 89493.6247 7023596 Europe 105.56081 88.77211 122.34952
## 380 90637.8870 7023596 Europe 106.07517 89.27556 122.87477
## 381 91782.1493 7023596 Europe 106.58952 89.77885 123.40019
## 382 92926.4116 7023596 Europe 107.10387 90.28199 123.92576
## 383 94070.6739 7023596 Europe 107.61822 90.78497 124.45148
## 384 95214.9362 7023596 Europe 108.13258 91.28779 124.97737
## 385 96359.1985 7023596 Europe 108.64693 91.79045 125.50341
## 386 97503.4608 7023596 Europe 109.16128 92.29296 126.02960
## 387 98647.7231 7023596 Europe 109.67563 92.79531 126.55596
## 388 99791.9854 7023596 Europe 110.18999 93.29751 127.08246
## 389 100936.2477 7023596 Europe 110.70434 93.79955 127.60913
## 390 102080.5100 7023596 Europe 111.21869 94.30144 128.13595
## 391 103224.7723 7023596 Europe 111.73304 94.80317 128.66292
## 392 104369.0346 7023596 Europe 112.24740 95.30475 129.19005
## 393 105513.2968 7023596 Europe 112.76175 95.80617 129.71733
## 394 106657.5591 7023596 Europe 113.27610 96.30744 130.24476
## 395 107801.8214 7023596 Europe 113.79045 96.80856 130.77235
## 396 108946.0837 7023596 Europe 114.30481 97.30952 131.30009
## 397 110090.3460 7023596 Europe 114.81916 97.81033 131.82799
## 398 111234.6083 7023596 Europe 115.33351 98.31099 132.35603
## 399 112378.8706 7023596 Europe 115.84786 98.81150 132.88423
## 400 113523.1329 7023596 Europe 116.36222 99.31186 133.41258
## 401 241.1659 7023596 Oceania 66.05193 49.28474 82.81912
## 402 1385.4282 7023596 Oceania 66.56628 49.80167 83.33090
## 403 2529.6905 7023596 Oceania 67.08064 50.31843 83.84284
## 404 3673.9528 7023596 Oceania 67.59499 50.83503 84.35495
## 405 4818.2150 7023596 Oceania 68.10934 51.35146 84.86722
## 406 5962.4773 7023596 Oceania 68.62369 51.86773 85.37966
## 407 7106.7396 7023596 Oceania 69.13805 52.38383 85.89226
## 408 8251.0019 7023596 Oceania 69.65240 52.89977 86.40503
## 409 9395.2642 7023596 Oceania 70.16675 53.41554 86.91796
## 410 10539.5265 7023596 Oceania 70.68110 53.93115 87.43106
## 411 11683.7888 7023596 Oceania 71.19546 54.44659 87.94433
## 412 12828.0511 7023596 Oceania 71.70981 54.96186 88.45776
## 413 13972.3134 7023596 Oceania 72.22416 55.47697 88.97135
## 414 15116.5757 7023596 Oceania 72.73851 55.99191 89.48511
## 415 16260.8380 7023596 Oceania 73.25287 56.50669 89.99904
## 416 17405.1003 7023596 Oceania 73.76722 57.02130 90.51313
## 417 18549.3626 7023596 Oceania 74.28157 57.53575 91.02739
## 418 19693.6249 7023596 Oceania 74.79592 58.05003 91.54182
## 419 20837.8872 7023596 Oceania 75.31028 58.56415 92.05641
## 420 21982.1494 7023596 Oceania 75.82463 59.07810 92.57116
## 421 23126.4117 7023596 Oceania 76.33898 59.59188 93.08608
## 422 24270.6740 7023596 Oceania 76.85333 60.10550 93.60117
## 423 25414.9363 7023596 Oceania 77.36769 60.61896 94.11642
## 424 26559.1986 7023596 Oceania 77.88204 61.13224 94.63183
## 425 27703.4609 7023596 Oceania 78.39639 61.64537 95.14742
## 426 28847.7232 7023596 Oceania 78.91074 62.15832 95.66316
## 427 29991.9855 7023596 Oceania 79.42510 62.67112 96.17908
## 428 31136.2478 7023596 Oceania 79.93945 63.18374 96.69516
## 429 32280.5101 7023596 Oceania 80.45380 63.69620 97.21140
## 430 33424.7724 7023596 Oceania 80.96815 64.20850 97.72781
## 431 34569.0347 7023596 Oceania 81.48251 64.72063 98.24438
## 432 35713.2970 7023596 Oceania 81.99686 65.23259 98.76112
## 433 36857.5593 7023596 Oceania 82.51121 65.74440 99.27803
## 434 38001.8216 7023596 Oceania 83.02556 66.25603 99.79510
## 435 39146.0838 7023596 Oceania 83.53992 66.76750 100.31233
## 436 40290.3461 7023596 Oceania 84.05427 67.27881 100.82973
## 437 41434.6084 7023596 Oceania 84.56862 67.78995 101.34729
## 438 42578.8707 7023596 Oceania 85.08297 68.30093 101.86502
## 439 43723.1330 7023596 Oceania 85.59733 68.81174 102.38292
## 440 44867.3953 7023596 Oceania 86.11168 69.32238 102.90097
## 441 46011.6576 7023596 Oceania 86.62603 69.83287 103.41919
## 442 47155.9199 7023596 Oceania 87.14038 70.34319 103.93758
## 443 48300.1822 7023596 Oceania 87.65474 70.85334 104.45613
## 444 49444.4445 7023596 Oceania 88.16909 71.36333 104.97484
## 445 50588.7068 7023596 Oceania 88.68344 71.87316 105.49372
## 446 51732.9691 7023596 Oceania 89.19779 72.38282 106.01276
## 447 52877.2314 7023596 Oceania 89.71215 72.89232 106.53197
## 448 54021.4937 7023596 Oceania 90.22650 73.40166 107.05134
## 449 55165.7559 7023596 Oceania 90.74085 73.91083 107.57087
## 450 56310.0182 7023596 Oceania 91.25520 74.41984 108.09056
## 451 57454.2805 7023596 Oceania 91.76956 74.92869 108.61042
## 452 58598.5428 7023596 Oceania 92.28391 75.43738 109.13044
## 453 59742.8051 7023596 Oceania 92.79826 75.94590 109.65063
## 454 60887.0674 7023596 Oceania 93.31261 76.45426 110.17097
## 455 62031.3297 7023596 Oceania 93.82697 76.96245 110.69148
## 456 63175.5920 7023596 Oceania 94.34132 77.47049 111.21215
## 457 64319.8543 7023596 Oceania 94.85567 77.97836 111.73298
## 458 65464.1166 7023596 Oceania 95.37002 78.48607 112.25397
## 459 66608.3789 7023596 Oceania 95.88438 78.99362 112.77513
## 460 67752.6412 7023596 Oceania 96.39873 79.50101 113.29645
## 461 68896.9035 7023596 Oceania 96.91308 80.00824 113.81792
## 462 70041.1658 7023596 Oceania 97.42743 80.51530 114.33956
## 463 71185.4281 7023596 Oceania 97.94179 81.02221 114.86136
## 464 72329.6903 7023596 Oceania 98.45614 81.52895 115.38332
## 465 73473.9526 7023596 Oceania 98.97049 82.03554 115.90544
## 466 74618.2149 7023596 Oceania 99.48484 82.54196 116.42772
## 467 75762.4772 7023596 Oceania 99.99920 83.04823 116.95016
## 468 76906.7395 7023596 Oceania 100.51355 83.55433 117.47276
## 469 78051.0018 7023596 Oceania 101.02790 84.06028 117.99552
## 470 79195.2641 7023596 Oceania 101.54225 84.56606 118.51844
## 471 80339.5264 7023596 Oceania 102.05661 85.07169 119.04152
## 472 81483.7887 7023596 Oceania 102.57096 85.57716 119.56476
## 473 82628.0510 7023596 Oceania 103.08531 86.08247 120.08815
## 474 83772.3133 7023596 Oceania 103.59966 86.58762 120.61170
## 475 84916.5756 7023596 Oceania 104.11402 87.09262 121.13542
## 476 86060.8379 7023596 Oceania 104.62837 87.59745 121.65928
## 477 87205.1002 7023596 Oceania 105.14272 88.10213 122.18331
## 478 88349.3625 7023596 Oceania 105.65707 88.60665 122.70749
## 479 89493.6247 7023596 Oceania 106.17143 89.11102 123.23183
## 480 90637.8870 7023596 Oceania 106.68578 89.61523 123.75633
## 481 91782.1493 7023596 Oceania 107.20013 90.11928 124.28098
## 482 92926.4116 7023596 Oceania 107.71448 90.62318 124.80579
## 483 94070.6739 7023596 Oceania 108.22884 91.12692 125.33076
## 484 95214.9362 7023596 Oceania 108.74319 91.63050 125.85588
## 485 96359.1985 7023596 Oceania 109.25754 92.13393 126.38115
## 486 97503.4608 7023596 Oceania 109.77189 92.63720 126.90658
## 487 98647.7231 7023596 Oceania 110.28625 93.14032 127.43217
## 488 99791.9854 7023596 Oceania 110.80060 93.64329 127.95791
## 489 100936.2477 7023596 Oceania 111.31495 94.14610 128.48380
## 490 102080.5100 7023596 Oceania 111.82930 94.64876 129.00985
## 491 103224.7723 7023596 Oceania 112.34366 95.15127 129.53605
## 492 104369.0346 7023596 Oceania 112.85801 95.65362 130.06240
## 493 105513.2968 7023596 Oceania 113.37236 96.15582 130.58890
## 494 106657.5591 7023596 Oceania 113.88671 96.65786 131.11556
## 495 107801.8214 7023596 Oceania 114.40107 97.15976 131.64237
## 496 108946.0837 7023596 Oceania 114.91542 97.66150 132.16933
## 497 110090.3460 7023596 Oceania 115.42977 98.16309 132.69645
## 498 111234.6083 7023596 Oceania 115.94412 98.66453 133.22371
## 499 112378.8706 7023596 Oceania 116.45848 99.16582 133.75113
## 500 113523.1329 7023596 Oceania 116.97283 99.66696 134.27869
# モデルの結果を表示する
# アフリカとヨーロッパだけ
p <- ggplot(data = subset(pred_df, continent %in% c("Africa", "Europe")),
mapping = aes(x = gdpPercap, y = fit,
ymin = lwr, ymax = upr,
color = continent, fill = continent,
group = continent))
p + geom_point(data = subset(gapminder, continent %in% c("Africa", "Europe")),
mapping = aes(x = gdpPercap, y = lifeExp,
color = continent),
alpha = 0.5, inherit.aes = FALSE) +
geom_line() +
geom_ribbon(alpha = 0.2, color = NA) +
scale_x_log10(labels = scales::dollar)
実際にpredict()関数を使うよりも便利なパッケージを使うことが多い. しかしながら,predict()a関数は様々なモデルに対して安全に動作するので, predict()関数を理解するのは重要.
broomパッケージ - Rが生成したモデルから作図に使える数値を抽出するための関数を集めたパッケージ - 主な用途はモデルオブジェクトをggplotで使いやすいデータフレームに変換すること 1. モデル自体の構成要素に関わる情報(回帰係数やt検定量など) 2. モデルと元データとの関係を表す観測ベースの情報(各観測値の近似値や残差) 3. モデルの当てはまりに関する情報(F統計量,モデルの逸脱度,決定係数など)
library(broom)
# 線形モデルの作成
out <- lm(formula = lifeExp ~ gdpPercap + pop + continent,
data = gapminder)
summary(out)
##
## Call:
## lm(formula = lifeExp ~ gdpPercap + pop + continent, data = gapminder)
##
## Residuals:
## Min 1Q Median 3Q Max
## -49.161 -4.486 0.297 5.110 25.175
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.781e+01 3.395e-01 140.819 < 2e-16 ***
## gdpPercap 4.495e-04 2.346e-05 19.158 < 2e-16 ***
## pop 6.570e-09 1.975e-09 3.326 0.000901 ***
## continentAmericas 1.348e+01 6.000e-01 22.458 < 2e-16 ***
## continentAsia 8.193e+00 5.712e-01 14.342 < 2e-16 ***
## continentEurope 1.747e+01 6.246e-01 27.973 < 2e-16 ***
## continentOceania 1.808e+01 1.782e+00 10.146 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.365 on 1697 degrees of freedom
## Multiple R-squared: 0.5821, Adjusted R-squared: 0.5806
## F-statistic: 393.9 on 6 and 1697 DF, p-value: < 2.2e-16
# モデルの構成要素レベルの抽出
out_comp <- tidy(out)
out_comp
## # A tibble: 7 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 4.78e+1 0.340 141. 0.
## 2 gdpPercap 4.50e-4 0.0000235 19.2 3.24e- 74
## 3 pop 6.57e-9 0.00000000198 3.33 9.01e- 4
## 4 continentAmericas 1.35e+1 0.600 22.5 5.19e- 98
## 5 continentAsia 8.19e+0 0.571 14.3 4.06e- 44
## 6 continentEurope 1.75e+1 0.625 28.0 6.34e-142
## 7 continentOceania 1.81e+1 1.78 10.1 1.59e- 23
# round_df: socvizパッケージ
out_comp %>% as.data.frame() %>% round_df()
## term estimate std.error statistic p.value
## 1 (Intercept) 47.81 0.34 140.82 0
## 2 gdpPercap 0.00 0.00 19.16 0
## 3 pop 0.00 0.00 3.33 0
## 4 continentAmericas 13.48 0.60 22.46 0
## 5 continentAsia 8.19 0.57 14.34 0
## 6 continentEurope 17.47 0.62 27.97 0
## 7 continentOceania 18.08 1.78 10.15 0
#線形回帰モデルの推定料の図示
p <- ggplot(data = out_comp,
mapping = aes(x = term, y = estimate))
p + geom_point() +
coord_flip()
# 推定値の信頼区間を求める
out_conf <- tidy(out, conf.int = TRUE)
out_conf %>% as.data.frame() %>% round_df()
## term estimate std.error statistic p.value conf.low conf.high
## 1 (Intercept) 47.81 0.34 140.82 0 47.15 48.48
## 2 gdpPercap 0.00 0.00 19.16 0 0.00 0.00
## 3 pop 0.00 0.00 3.33 0 0.00 0.00
## 4 continentAmericas 13.48 0.60 22.46 0 12.30 14.65
## 5 continentAsia 8.19 0.57 14.34 0 7.07 9.31
## 6 continentEurope 17.47 0.62 27.97 0 16.25 18.70
## 7 continentOceania 18.08 1.78 10.15 0 14.59 21.58
# 切片を削除する
out_conf <- subset(out_conf, term %nin% "(Intercept)")
# 大陸名を調整する
out_conf$nicelabs <- prefix_strip(out_conf$term, "continent")
out_conf
## # A tibble: 6 x 8
## term estimate std.error statistic p.value conf.low conf.high nicelabs
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 gdpPercap 4.50e-4 2.35e-5 19.2 3.24e- 74 4.03e-4 4.96e-4 gdpPerc…
## 2 pop 6.57e-9 1.98e-9 3.33 9.01e- 4 2.70e-9 1.04e-8 Pop
## 3 continen… 1.35e+1 6.00e-1 22.5 5.19e- 98 1.23e+1 1.47e+1 Americas
## 4 continen… 8.19e+0 5.71e-1 14.3 4.06e- 44 7.07e+0 9.31e+0 Asia
## 5 continen… 1.75e+1 6.25e-1 28.0 6.34e-142 1.62e+1 1.87e+1 Europe
## 6 continen… 1.81e+1 1.78e+0 10.1 1.59e- 23 1.46e+1 2.16e+1 Oceania
p <- ggplot(data = out_conf,
mapping = aes(x = reorder(nicelabs, estimate),
y = estimate,
ymin = conf.low, ymax = conf.high))
p + geom_pointrange() +
coord_flip()
augment()関数: 元データの観測レベルから計算される統計量.変数名の先頭には.がついている
out_aug <- augment(out)
head(out_aug) %>% as.data.frame() %>% round_df()
## lifeExp gdpPercap pop continent .fitted .resid .hat .sigma .cooksd
## 1 28.80 779.45 8425333 Asia 56.41 -27.61 0 8.34 0.01
## 2 30.33 820.85 9240934 Asia 56.44 -26.10 0 8.34 0.00
## 3 32.00 853.10 10267083 Asia 56.46 -24.46 0 8.35 0.00
## 4 34.02 836.20 11537966 Asia 56.46 -22.44 0 8.35 0.00
## 5 36.09 739.98 13079460 Asia 56.43 -20.34 0 8.35 0.00
## 6 38.44 786.11 14880372 Asia 56.46 -18.02 0 8.36 0.00
## .std.resid
## 1 -3.31
## 2 -3.13
## 3 -2.93
## 4 -2.69
## 5 -2.44
## 6 -2.16
# augment()関数の引数にdataを指定すると,変数全てを追加できる
out_aug <- augment(out, data = gapminder)
head(out_aug) %>% as.data.frame() %>% round_df()
## country continent year lifeExp pop gdpPercap .fitted .resid .hat
## 1 Afghanistan Asia 1952 28.80 8425333 779.45 56.41 -27.61 0
## 2 Afghanistan Asia 1957 30.33 9240934 820.85 56.44 -26.10 0
## 3 Afghanistan Asia 1962 32.00 10267083 853.10 56.46 -24.46 0
## 4 Afghanistan Asia 1967 34.02 11537966 836.20 56.46 -22.44 0
## 5 Afghanistan Asia 1972 36.09 13079460 739.98 56.43 -20.34 0
## 6 Afghanistan Asia 1977 38.44 14880372 786.11 56.46 -18.02 0
## .sigma .cooksd .std.resid
## 1 8.34 0.01 -3.31
## 2 8.34 0.00 -3.13
## 3 8.35 0.00 -2.93
## 4 8.35 0.00 -2.69
## 5 8.35 0.00 -2.44
## 6 8.36 0.00 -2.16
# 予測値 vs 残差プロット
p <- ggplot(data = out_aug,
mapping = aes(x = .fitted, y = .resid))
p + geom_point()
glance()関数: モデルオブジェクトにsummary()関数を適用した時の結果を整理するための関数. 真の力はデータをグループ化したり,モデルの一部を抽出してスケールアップができたりする点にある
glance(out)
## # A tibble: 1 x 12
## r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.582 0.581 8.37 394. 3.94e-317 6 -6034. 12084. 12127.
## # … with 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
しかし,broomパッケージのtidy(),augment(),glance()関数は全てのクラスのモデルに対して,全ての機能を使えるわけではない
library(survival)
head(lung)
## inst time status age sex ph.ecog ph.karno pat.karno meal.cal wt.loss
## 1 3 306 2 74 1 1 90 100 1175 NA
## 2 3 455 2 68 1 0 90 90 1225 15
## 3 3 1010 1 56 1 0 90 90 NA 15
## 4 5 210 2 57 1 1 90 60 1150 11
## 5 1 883 2 60 1 0 100 90 NA 0
## 6 12 1022 1 74 1 1 50 80 513 0
?lung
# Surv()関数を使ってCox比例ハザードモデルの応答変数,アウトカムの変数を作成し,
# 次のcpxph()関数で予測値を算出する
out_cph <- coxph(Surv(time, status) ~ age + sex,
data = lung)
# survfit()関数をつかって,モデルから生存曲線を作成する
out_surv <- survfit(out_cph)
summary(out_surv)
## Call: survfit(formula = out_cph)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 5 228 1 0.9958 0.00417 0.9877 1.000
## 11 227 3 0.9833 0.00831 0.9671 1.000
## 12 224 1 0.9791 0.00928 0.9611 0.997
## 13 223 2 0.9706 0.01096 0.9494 0.992
## 15 221 1 0.9664 0.01170 0.9438 0.990
## 26 220 1 0.9622 0.01240 0.9382 0.987
## 30 219 1 0.9579 0.01305 0.9327 0.984
## 31 218 1 0.9537 0.01368 0.9273 0.981
## 53 217 2 0.9452 0.01484 0.9165 0.975
## 54 215 1 0.9409 0.01538 0.9112 0.972
## 59 214 1 0.9366 0.01590 0.9060 0.968
## 60 213 2 0.9280 0.01689 0.8955 0.962
## 61 211 1 0.9237 0.01735 0.8903 0.958
## 62 210 1 0.9194 0.01780 0.8852 0.955
## 65 209 2 0.9109 0.01865 0.8751 0.948
## 71 207 1 0.9066 0.01906 0.8700 0.945
## 79 206 1 0.9023 0.01945 0.8650 0.941
## 81 205 2 0.8937 0.02020 0.8550 0.934
## 88 203 2 0.8851 0.02091 0.8451 0.927
## 92 201 1 0.8809 0.02126 0.8402 0.924
## 93 199 1 0.8766 0.02159 0.8352 0.920
## 95 198 2 0.8679 0.02224 0.8254 0.913
## 105 196 1 0.8636 0.02256 0.8205 0.909
## 107 194 2 0.8549 0.02317 0.8107 0.902
## 110 192 1 0.8506 0.02346 0.8058 0.898
## 116 191 1 0.8462 0.02375 0.8009 0.894
## 118 190 1 0.8419 0.02403 0.7961 0.890
## 122 189 1 0.8375 0.02431 0.7912 0.887
## 131 188 1 0.8332 0.02458 0.7863 0.883
## 132 187 2 0.8244 0.02510 0.7767 0.875
## 135 185 1 0.8201 0.02535 0.7719 0.871
## 142 184 1 0.8157 0.02560 0.7671 0.867
## 144 183 1 0.8113 0.02584 0.7622 0.864
## 145 182 2 0.8026 0.02631 0.7527 0.856
## 147 180 1 0.7982 0.02654 0.7479 0.852
## 153 179 1 0.7939 0.02676 0.7431 0.848
## 156 178 2 0.7852 0.02719 0.7336 0.840
## 163 176 3 0.7720 0.02780 0.7194 0.828
## 166 173 2 0.7632 0.02819 0.7099 0.821
## 167 171 1 0.7588 0.02837 0.7052 0.817
## 170 170 1 0.7545 0.02856 0.7005 0.813
## 175 167 1 0.7500 0.02874 0.6958 0.809
## 176 165 1 0.7456 0.02892 0.6910 0.804
## 177 164 1 0.7411 0.02910 0.6862 0.800
## 179 162 2 0.7321 0.02946 0.6766 0.792
## 180 160 1 0.7276 0.02963 0.6717 0.788
## 181 159 2 0.7185 0.02996 0.6621 0.780
## 182 157 1 0.7140 0.03012 0.6573 0.776
## 183 156 1 0.7095 0.03028 0.6525 0.771
## 186 154 1 0.7049 0.03044 0.6477 0.767
## 189 152 1 0.7003 0.03059 0.6428 0.763
## 194 149 1 0.6957 0.03075 0.6379 0.759
## 197 147 1 0.6910 0.03091 0.6330 0.754
## 199 145 1 0.6863 0.03106 0.6280 0.750
## 201 144 2 0.6769 0.03136 0.6181 0.741
## 202 142 1 0.6722 0.03150 0.6132 0.737
## 207 139 1 0.6674 0.03165 0.6082 0.732
## 208 138 1 0.6627 0.03179 0.6032 0.728
## 210 137 1 0.6579 0.03192 0.5982 0.724
## 212 135 1 0.6531 0.03206 0.5932 0.719
## 218 134 1 0.6483 0.03219 0.5882 0.715
## 222 132 1 0.6435 0.03232 0.5832 0.710
## 223 130 1 0.6386 0.03246 0.5781 0.706
## 226 126 1 0.6336 0.03260 0.5728 0.701
## 229 125 1 0.6286 0.03274 0.5676 0.696
## 230 124 1 0.6236 0.03287 0.5624 0.691
## 239 121 2 0.6133 0.03314 0.5517 0.682
## 245 117 1 0.6081 0.03327 0.5463 0.677
## 246 116 1 0.6030 0.03340 0.5409 0.672
## 267 112 1 0.5977 0.03354 0.5354 0.667
## 268 111 1 0.5924 0.03367 0.5299 0.662
## 269 110 1 0.5871 0.03379 0.5244 0.657
## 270 108 1 0.5817 0.03392 0.5189 0.652
## 283 104 1 0.5762 0.03405 0.5132 0.647
## 284 103 1 0.5707 0.03419 0.5075 0.642
## 285 101 2 0.5595 0.03444 0.4959 0.631
## 286 99 1 0.5538 0.03456 0.4901 0.626
## 288 98 1 0.5482 0.03468 0.4843 0.621
## 291 97 1 0.5426 0.03479 0.4785 0.615
## 293 94 1 0.5368 0.03490 0.4726 0.610
## 301 91 1 0.5310 0.03502 0.4666 0.604
## 303 89 1 0.5250 0.03514 0.4604 0.599
## 305 87 1 0.5189 0.03526 0.4542 0.593
## 306 86 1 0.5129 0.03537 0.4480 0.587
## 310 85 2 0.5007 0.03558 0.4356 0.576
## 320 82 1 0.4946 0.03568 0.4294 0.570
## 329 81 1 0.4884 0.03577 0.4231 0.564
## 337 79 1 0.4822 0.03586 0.4168 0.558
## 340 78 1 0.4760 0.03594 0.4105 0.552
## 345 77 1 0.4698 0.03601 0.4043 0.546
## 348 76 1 0.4636 0.03607 0.3981 0.540
## 350 75 1 0.4575 0.03612 0.3919 0.534
## 351 74 1 0.4514 0.03617 0.3858 0.528
## 353 73 2 0.4391 0.03623 0.3735 0.516
## 361 70 1 0.4329 0.03626 0.3674 0.510
## 363 69 2 0.4205 0.03629 0.3551 0.498
## 364 67 1 0.4143 0.03629 0.3489 0.492
## 371 65 2 0.4017 0.03629 0.3365 0.479
## 387 60 1 0.3952 0.03629 0.3301 0.473
## 390 59 1 0.3887 0.03629 0.3237 0.467
## 394 58 1 0.3822 0.03627 0.3173 0.460
## 426 55 1 0.3753 0.03628 0.3106 0.454
## 428 54 1 0.3685 0.03627 0.3038 0.447
## 429 53 1 0.3616 0.03625 0.2971 0.440
## 433 52 1 0.3547 0.03622 0.2904 0.433
## 442 51 1 0.3478 0.03618 0.2837 0.426
## 444 50 1 0.3409 0.03612 0.2770 0.420
## 450 48 1 0.3338 0.03607 0.2701 0.413
## 455 47 1 0.3267 0.03600 0.2633 0.405
## 457 46 1 0.3196 0.03592 0.2564 0.398
## 460 44 1 0.3123 0.03584 0.2494 0.391
## 473 43 1 0.3049 0.03575 0.2423 0.384
## 477 42 1 0.2976 0.03564 0.2353 0.376
## 519 39 1 0.2899 0.03554 0.2280 0.369
## 520 38 1 0.2822 0.03543 0.2206 0.361
## 524 37 2 0.2668 0.03514 0.2061 0.345
## 533 34 1 0.2590 0.03498 0.1987 0.337
## 550 32 1 0.2510 0.03481 0.1913 0.329
## 558 30 1 0.2428 0.03463 0.1836 0.321
## 567 28 1 0.2344 0.03445 0.1757 0.313
## 574 27 1 0.2259 0.03425 0.1678 0.304
## 583 26 1 0.2173 0.03401 0.1599 0.295
## 613 24 1 0.2083 0.03378 0.1516 0.286
## 624 23 1 0.1992 0.03351 0.1433 0.277
## 641 22 1 0.1901 0.03320 0.1350 0.268
## 643 21 1 0.1811 0.03283 0.1269 0.258
## 654 20 1 0.1719 0.03243 0.1187 0.249
## 655 19 1 0.1627 0.03196 0.1107 0.239
## 687 18 1 0.1535 0.03145 0.1027 0.229
## 689 17 1 0.1444 0.03088 0.0950 0.220
## 705 16 1 0.1352 0.03025 0.0872 0.210
## 707 15 1 0.1263 0.02954 0.0799 0.200
## 728 14 1 0.1172 0.02878 0.0725 0.190
## 731 13 1 0.1083 0.02794 0.0653 0.180
## 735 12 1 0.0995 0.02703 0.0584 0.169
## 765 10 1 0.0904 0.02606 0.0514 0.159
## 791 9 1 0.0817 0.02499 0.0449 0.149
## 814 7 1 0.0719 0.02387 0.0375 0.138
## 883 4 1 0.0577 0.02303 0.0264 0.126
# 予測した生存曲線を作図する
out_tidy <- tidy(out_surv)
out_tidy
## # A tibble: 186 x 8
## time n.risk n.event n.censor estimate std.error conf.high conf.low
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 5 228 1 0 0.996 0.00419 1 0.988
## 2 11 227 3 0 0.983 0.00845 1.00 0.967
## 3 12 224 1 0 0.979 0.00947 0.997 0.961
## 4 13 223 2 0 0.971 0.0113 0.992 0.949
## 5 15 221 1 0 0.966 0.0121 0.990 0.944
## 6 26 220 1 0 0.962 0.0129 0.987 0.938
## 7 30 219 1 0 0.958 0.0136 0.984 0.933
## 8 31 218 1 0 0.954 0.0143 0.981 0.927
## 9 53 217 2 0 0.945 0.0157 0.975 0.917
## 10 54 215 1 0 0.941 0.0163 0.972 0.911
## # … with 176 more rows
p <- ggplot(data = out_tidy,
mapping = aes(x = time,
y = estimate))
p + geom_line() +
geom_ribbon(mapping = aes(ymin = conf.low,
ymax = conf.high),
alpha = 0.2) +
labs(title = "Kaplan-Meier method")
備考
カプランマイヤー法
イベントが発生するまでの時間(生存時間)分析する生存時間分析の手法. 期間内にイベントが起きなかった例を「打ち切り」として分析に含めることが出来る.
例
手術から再発までの時間を分析する際に,途中で転院したり,治療を中断したケースは,少なくとも再発していないためイベントに該当しないが,欠損値として分析から除外するとバイアスがかかる.打ち切り例は,イベントが起きる(打ち切りになる)までは生存率の計算に寄与し,打ち切り後はケースから除外される
broomパッケージを使うと,データのサブセットにモデルを適用し, それぞれのサブセットごとに当てはめたモデルの結果をまとめた表を作ることが可能
# gapminderデータセットから1977年のEuropeのデータを抽出し
# モデルを作成する
eu77 <- gapminder %>% filter(continent == "Europe", year == 1977)
fit <- lm(formula = lifeExp ~ log(gdpPercap),
data = eu77)
summary(fit)
##
## Call:
## lm(formula = lifeExp ~ log(gdpPercap), data = eu77)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.4956 -1.0306 0.0935 1.1755 3.7125
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 29.489 7.161 4.118 0.000306 ***
## log(gdpPercap) 4.488 0.756 5.936 2.17e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.114 on 28 degrees of freedom
## Multiple R-squared: 0.5572, Adjusted R-squared: 0.5414
## F-statistic: 35.24 on 1 and 28 DF, p-value: 2.173e-06
fit_comp <- tidy(fit, conf.int = TRUE)
fit_comp
## # A tibble: 2 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 29.5 7.16 4.12 0.000306 14.8 44.2
## 2 log(gdpPercap) 4.49 0.756 5.94 0.00000217 2.94 6.04
# 推定値の信頼区間を求める
p <- ggplot(data = fit_comp,
mapping = aes(x = term,
y = estimate,
ymin = conf.low,
ymax = conf.high))
p + geom_pointrange() +
coord_flip()
dplyrとbroomを使うことで大陸ー年ごとに層別化されたデータを コンパクトかつtidyな方法で処理・解析できる
#nest: tidyrパッケージの関数
out_le <- gapminder %>%
group_by(continent, year) %>%
nest() # .key で名前を変更できる
head(out_le)
## # A tibble: 6 x 3
## # Groups: continent, year [6]
## continent year data
## <fct> <int> <list>
## 1 Asia 1952 <tibble [33 × 4]>
## 2 Asia 1957 <tibble [33 × 4]>
## 3 Asia 1962 <tibble [33 × 4]>
## 4 Asia 1967 <tibble [33 × 4]>
## 5 Asia 1972 <tibble [33 × 4]>
## 6 Asia 1977 <tibble [33 × 4]>
?nest
# nest()関数によって,
# 表形式のまま複雑なオブジェクト(リスト)を保存することができる
# unnestを使って対象のデータを取り出すことが可能
out_le %>% filter(continent == "Europe" & year == 1977)%>%
unnest(cols = c(data))
## # A tibble: 30 x 6
## # Groups: continent, year [1]
## continent year country lifeExp pop gdpPercap
## <fct> <int> <fct> <dbl> <int> <dbl>
## 1 Europe 1977 Albania 68.9 2509048 3533.
## 2 Europe 1977 Austria 72.2 7568430 19749.
## 3 Europe 1977 Belgium 72.8 9821800 19118.
## 4 Europe 1977 Bosnia and Herzegovina 69.9 4086000 3528.
## 5 Europe 1977 Bulgaria 70.8 8797022 7612.
## 6 Europe 1977 Croatia 70.6 4318673 11305.
## 7 Europe 1977 Czech Republic 70.7 10161915 14800.
## 8 Europe 1977 Denmark 74.7 5088419 20423.
## 9 Europe 1977 Finland 72.5 4738902 15605.
## 10 Europe 1977 France 73.8 53165019 18293.
## # … with 20 more rows
# リスト列は,
# 1. 列内のオブジェクトに対してまとめて簡潔かつtidyな操作ができる
# まずfit_ols関数を作成し,対象のデータフレームに対して線形モデルを実行することが可能
fit_ols <- function(df){
lm(formula = lifeExp ~ log(gdpPercap),
data = df)
}
# fit_ols関数を,リスト列を含むそれぞれの行に順番にmapする
out_le <- gapminder %>%
group_by(continent, year) %>%
nest() %>%
mutate(model = map(data, fit_ols))
out_le %>% filter(continent == "Asia" & year == 1977) %>%
unnest(cols = c(data))
## # A tibble: 33 x 7
## # Groups: continent, year [1]
## continent year country lifeExp pop gdpPercap model
## <fct> <int> <fct> <dbl> <int> <dbl> <list>
## 1 Asia 1977 Afghanistan 38.4 14880372 786. <lm>
## 2 Asia 1977 Bahrain 65.6 297410 19340. <lm>
## 3 Asia 1977 Bangladesh 46.9 80428306 660. <lm>
## 4 Asia 1977 Cambodia 31.2 6978607 525. <lm>
## 5 Asia 1977 China 64.0 943455000 741. <lm>
## 6 Asia 1977 Hong Kong, China 73.6 4583700 11186. <lm>
## 7 Asia 1977 India 54.2 634000000 813. <lm>
## 8 Asia 1977 Indonesia 52.7 136725000 1383. <lm>
## 9 Asia 1977 Iran 57.7 35480679 11889. <lm>
## 10 Asia 1977 Iraq 60.4 11882916 14688. <lm>
## # … with 23 more rows
# 初めから一回で行う
# やることをまとめると
# 1. (大陸+年代)別に線形モデル作成する
# 2. それぞれのモデルから要約統計量を抽出する
# 3. この結果のネストを解除して,切片項とオセアニアデータを削除する(前者は便宜上,後者は国が少ないから)
fit_ols <- function(.df){
lm(formula = lifeExp ~ log(gdpPercap),
data = .df)
}
out_tidy <- gapminder %>%
group_by(continent, year) %>%
nest() %>%
mutate(model = map(data, fit_ols),
tidied = map(model, tidy)) %>%
unnest(tidied) %>%
select(!c(data, model)) %>% # data列とmodel列はいらない
filter(term %nin% "(Intercept)",
continent %nin% "Oceania")
out_tidy %>% as.data.frame() %>%
round_df() %>% slice_sample(n = 5, replace = TRUE)
## continent year term estimate std.error statistic p.value
## 1 Americas 2002 log(gdpPercap) 5.05 0.84 5.99 0
## 2 Europe 2002 log(gdpPercap) 3.74 0.45 8.40 0
## 3 Africa 1987 log(gdpPercap) 6.48 0.90 7.22 0
## 4 Europe 2002 log(gdpPercap) 3.74 0.45 8.40 0
## 5 Asia 1952 log(gdpPercap) 4.16 1.25 3.33 0
# 以上のコードにより,
# 大陸内で各年ごとに一人当たりの対数変換されたGDPと平均寿命との関係について解析した回帰分析の結果がtidyな形で得られた
# 得られたモデルの推定値はそれらのグループを確認し,図示する為に利用する
# 大陸ごとに層別した年区切りのGDPと平均寿命の関係における推定値
p <- ggplot(data = out_tidy,
mapping = aes(x = year, y = estimate,
ymin = estimate - 2*std.error,
ymax = estimate + 2*std.error,
group = continent, color = continent))
p + geom_pointrange(position = position_dodge(width = 1)) + # すこしずらす
scale_x_continuous(breaks = unique(gapminder$year)) + # 調査年に合わせる
theme(legend.position = "top") +
labs(x = "Year", y = "Estimate", color = "Continent")
モデルの偏効果・限界効果を推定して図示することが, モデルを正確に解釈し,有用な予測を示すための一般的な方法
限界効果: 説明変数が一単位ふえたときの目的変数の増加量
偏効果: 他の独立した説明変数の値が固定されている時に,説明変数が一単位増えた場合の目的変数の増加量
# 限界効果を可視化するパッケージ
library(margins)
これから行うこと 扱うデータセット - gss_sm: アメリカ合衆国の一般的な社会調査データ 目的変数 - obama: オバマに投票した場合に1, それ以外は0 説明変数 - age(年齢): 離散値 - polviews(リベラル・保守の傾向): Extremely Conservative ~ Extremely Liberal. Moderateが参照カテゴリ - race(人種): White, Black, Other. Whiteが参照カテゴリ - sex(性別): Male, Female. Maleが参照カテゴリ ※参照カテゴリ: 比較の基準となる変数. 方法 - ロジスティック回帰.人種と性別の間に交互作用があるとする
# まずpolviewsの参照カテゴリを変更する
# relevel()関数で可能
gss_sm$polviews_m <- relevel(gss_sm$polviews, ref = "Moderate")
out_bo <- glm(formula = obama ~ polviews_m + sex*race,
family = "binomial",
data = gss_sm)
summary(out_bo)
##
## Call:
## glm(formula = obama ~ polviews_m + sex * race, family = "binomial",
## data = gss_sm)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.9045 -0.5541 0.1772 0.5418 2.2437
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.296493 0.134091 2.211 0.02703 *
## polviews_mExtremely Liberal 2.372950 0.525045 4.520 6.20e-06 ***
## polviews_mLiberal 2.600031 0.356666 7.290 3.10e-13 ***
## polviews_mSlightly Liberal 1.293172 0.248435 5.205 1.94e-07 ***
## polviews_mSlightly Conservative -1.355277 0.181291 -7.476 7.68e-14 ***
## polviews_mConservative -2.347463 0.200384 -11.715 < 2e-16 ***
## polviews_mExtremely Conservative -2.727384 0.387210 -7.044 1.87e-12 ***
## sexFemale 0.254866 0.145370 1.753 0.07956 .
## raceBlack 3.849526 0.501319 7.679 1.61e-14 ***
## raceOther -0.002143 0.435763 -0.005 0.99608
## sexFemale:raceBlack -0.197506 0.660066 -0.299 0.76477
## sexFemale:raceOther 1.574829 0.587657 2.680 0.00737 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2247.9 on 1697 degrees of freedom
## Residual deviance: 1345.9 on 1686 degrees of freedom
## (1169 observations deleted due to missingness)
## AIC: 1369.9
##
## Number of Fisher Scoring iterations: 6
# margins()関数を使ってそれぞれの変数の限界効果を計算する
bo_m <- margins(out_bo)
summary(bo_m)
## factor AME SE z p lower
## polviews_mConservative -0.4119 0.0283 -14.5394 0.0000 -0.4674
## polviews_mExtremely Conservative -0.4538 0.0420 -10.7971 0.0000 -0.5361
## polviews_mExtremely Liberal 0.2681 0.0295 9.0996 0.0000 0.2103
## polviews_mLiberal 0.2768 0.0229 12.0736 0.0000 0.2319
## polviews_mSlightly Conservative -0.2658 0.0330 -8.0596 0.0000 -0.3304
## polviews_mSlightly Liberal 0.1933 0.0303 6.3896 0.0000 0.1340
## raceBlack 0.4032 0.0173 23.3568 0.0000 0.3694
## raceOther 0.1247 0.0386 3.2297 0.0012 0.0490
## sexFemale 0.0443 0.0177 2.5073 0.0122 0.0097
## upper
## -0.3564
## -0.3714
## 0.3258
## 0.3218
## -0.2011
## 0.2526
## 0.4371
## 0.2005
## 0.0789
# marginsパッケージに独自の可視化メソッドがある
# ここではggplot2を用いて可視化する
# まずtibbleに変換する
bo_gg <- as_tibble(summary(bo_m))
bo_gg
## # A tibble: 9 x 7
## factor AME SE z p lower upper
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 polviews_mConservative -0.412 0.0283 -14.5 6.82e- 48 -0.467 -0.356
## 2 polviews_mExtremely Conserva… -0.454 0.0420 -10.8 3.55e- 27 -0.536 -0.371
## 3 polviews_mExtremely Liberal 0.268 0.0295 9.10 9.07e- 20 0.210 0.326
## 4 polviews_mLiberal 0.277 0.0229 12.1 1.46e- 33 0.232 0.322
## 5 polviews_mSlightly Conservat… -0.266 0.0330 -8.06 7.65e- 16 -0.330 -0.201
## 6 polviews_mSlightly Liberal 0.193 0.0303 6.39 1.66e- 10 0.134 0.253
## 7 raceBlack 0.403 0.0173 23.4 1.18e-120 0.369 0.437
## 8 raceOther 0.125 0.0386 3.23 1.24e- 3 0.0490 0.200
## 9 sexFemale 0.0443 0.0177 2.51 1.22e- 2 0.00967 0.0789
# factorのラベルを編集する
prefixes <- c("polviews_m", "sex")
bo_gg$factor <- prefix_strip(bo_gg$factor, prefixes)
bo_gg$factor <- prefix_replace(bo_gg$factor, "race", "Race: ")
bo_gg %>% select(factor, AME, lower, upper)
## # A tibble: 9 x 4
## factor AME lower upper
## <chr> <dbl> <dbl> <dbl>
## 1 Conservative -0.412 -0.467 -0.356
## 2 Extremely Conservative -0.454 -0.536 -0.371
## 3 Extremely Liberal 0.268 0.210 0.326
## 4 Liberal 0.277 0.232 0.322
## 5 Slightly Conservative -0.266 -0.330 -0.201
## 6 Slightly Liberal 0.193 0.134 0.253
## 7 Race: Black 0.403 0.369 0.437
## 8 Race: Other 0.125 0.0490 0.200
## 9 Female 0.0443 0.00967 0.0789
# 作図
# 平均限界効果の可視化
p <- ggplot(data = bo_gg,
mapping = aes(x = reorder(factor, AME),
y = AME,
ymin = lower, ymax = upper))
p + geom_hline(yintercept = 0, color = "gray80") +
geom_pointrange() +
coord_flip() +
labs(x = NULL, y = "Average Marginal Effect")
# 変数の条件付き効果を作図する場合
# 条件付き効果ってなんだ?
pv_cp <- cplot(out_bo, x = "sex", draw = FALSE)
pv_cp
## xvals yvals upper lower
## 1 Male 0.5735849 0.6378653 0.5093045
## 2 Female 0.6344507 0.6887845 0.5801169
p <- ggplot(data = pv_cp,
mapping = aes(x = reorder(xvals, yvals),
y = yvals,
ymin = lower, ymax = upper))
p + geom_hline(yintercept = 0, color = "gray80") +
geom_pointrange() +
coord_flip() +
labs(x = NULL, y = "Conditional Effect")
社会科学では,複雑な調査デザインの元,収集されたデータを解析する. 層別化,反復重み付け(対照グループと比較するため),クラスター構造をもったデータを扱うことも多い. これを扱う方法として,Thomas Lumleyが開発したsurveyパッケージおよびGerg Freedman Ellisが開発したsrvyrパッケージがある. srvyrパッケージはsurveyパッケージをtidyverseの文法(パイプライン)で書けるようにしたもの.
扱うデータ: gss_lon.
1972年にGSS(General Social Survey)が始まって以来のGSSの様々な調査値の変動に関するサブセットが含まれている 行うこと: 1976~2016年までのそれぞれの調査年における,人種ごとの重み付き教育歴の分布を推定する
# パッケージの読み込み
library(survey)
## Loading required package: grid
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(srvyr)
##
## Attaching package: 'srvyr'
## The following object is masked from 'package:stats':
##
## filter
# データの確認
glimpse(gss_lon)
## Rows: 62,466
## Columns: 25
## $ year <dbl> 1972, 1972, 1972, 1972, 1972, 1972, 1972, 1972, 1972, 19…
## $ id <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1…
## $ ballot <labelled> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ age <labelled> 23, 70, 48, 27, 61, 26, 28, 27, 21, 30, 30, 56, 54,…
## $ degree <fct> Bachelor, Lt High School, High School, Bachelor, High Sc…
## $ race <fct> White, White, White, White, White, White, White, White, …
## $ sex <fct> Female, Male, Female, Female, Female, Male, Male, Male, …
## $ siblings <fct> 3, 4, 5, 5, 2, 1, 6+, 1, 2, 6+, 6+, 6+, 2, 2, 0, 6+, 0, …
## $ kids <fct> 0, 4+, 4+, 0, 2, 0, 2, 0, 2, 4+, 1, 4+, 1, 2, 4+, 2, 2, …
## $ bigregion <fct> Midwest, Midwest, Midwest, Midwest, Midwest, Midwest, Mi…
## $ income16 <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ religion <fct> Jewish, Catholic, Protestant, Other, Protestant, Protest…
## $ marital <fct> Never Married, Married, Married, Married, Married, Never…
## $ padeg <fct> Lt High School, Lt High School, Lt High School, Bachelor…
## $ madeg <fct> NA, Lt High School, Lt High School, High School, Lt High…
## $ partyid <fct> "Ind,near Dem", "Not Str Democrat", "Independent", "Not …
## $ polviews <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ happy <fct> Not Too Happy, Not Too Happy, Pretty Happy, Not Too Happ…
## $ partners_rc <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ grass <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ zodiac <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ pres12 <labelled> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ wtssall <dbl> 0.4446, 0.8893, 0.8893, 0.8893, 0.8893, 0.4446, 0.4446, …
## $ vpsu <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ vstrat <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
options(survey.lonely.psu = "adjust")
options(na.action = "na.pass")
gss_wt <- subset(gss_lon, year > 1974) %>%
mutate(stratvar = interaction(year, vstrat)) %>%
as_survey_design(ids = vpsu,
strata = stratvar,
weights = wtssall,
nest = TRUE)
# stratvar列: 階層構造の情報である年ごとのサンプリング層の情報
# interaction()関数を使って,yearとvstrat変数を組み合わせた,それぞれの年についての階層情報ベクトル
# 出力として,「1976.7001」のようなyear.vstratの形になる
# as_survey_design()関数を使って,調査デザインに関する情報を追加する
# サンプリングID,層(strata),重み付け(weight)に関する情報を追加する
# survey_mean()関数を使って,1976~2016年それぞれの年における人種ごとの教育歴の分布を算出する
out_grp <- gss_wt %>%
filter(year %in% seq(1976, 2016, by = 4)) %>%
group_by(year, race, degree) %>%
summarize(prop = survey_mean(na.rm = TRUE))
out_grp
## # A tibble: 162 x 5
## # Groups: year, race [30]
## year race degree prop prop_se
## <dbl> <fct> <fct> <dbl> <dbl>
## 1 1976 White Lt High School 0.327 0.0160
## 2 1976 White High School 0.517 0.0161
## 3 1976 White Junior College 0.0128 0.00298
## 4 1976 White Bachelor 0.101 0.00955
## 5 1976 White Graduate 0.0392 0.00642
## 6 1976 White <NA> 0.00285 0.00151
## 7 1976 Black Lt High School 0.558 0.0603
## 8 1976 Black High School 0.335 0.0476
## 9 1976 Black Junior College 0.0423 0.0192
## 10 1976 Black Bachelor 0.0577 0.0238
## # … with 152 more rows
# 確認
out_grp %>% group_by(year, race) %>%
summarize(sum = sum(prop))
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
## # A tibble: 30 x 3
## # Groups: year [10]
## year race sum
## <dbl> <fct> <dbl>
## 1 1976 White 1
## 2 1976 Black 1
## 3 1976 Other 1.00
## 4 1980 White 1
## 5 1980 Black 1
## 6 1980 Other 1
## 7 1984 White 1
## 8 1984 Black 1
## 9 1984 Other 1
## 10 1988 White 1
## # … with 20 more rows
# 度数の比率は各年の人種ごとに合計されて1になる
# 各年の人種と教育歴の全ての組み合わせの合計で1にしたい場合は
# interaction()関数を使って交互作用変数を考えると良い
out_mrg <- gss_wt %>%
filter(year %in% seq(1976, 2016, by = 4)) %>%
mutate(racedeg = interaction(race, degree)) %>%
group_by(year, racedeg) %>%
summarize(prop = survey_mean(na.rm = TRUE))
out_mrg
## # A tibble: 155 x 4
## # Groups: year [10]
## year racedeg prop prop_se
## <dbl> <fct> <dbl> <dbl>
## 1 1976 White.Lt High School 0.297 0.0146
## 2 1976 Black.Lt High School 0.0470 0.00837
## 3 1976 Other.Lt High School 0.00194 0.00138
## 4 1976 White.High School 0.469 0.0159
## 5 1976 Black.High School 0.0282 0.00593
## 6 1976 Other.High School 0.00324 0.00166
## 7 1976 White.Junior College 0.0117 0.00268
## 8 1976 Black.Junior College 0.00356 0.00162
## 9 1976 White.Bachelor 0.0916 0.00883
## 10 1976 Black.Bachelor 0.00486 0.00213
## # … with 145 more rows
# 確認
out_mrg %>% group_by(year) %>%
summarise(total = sum(prop))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 10 x 2
## year total
## <dbl> <dbl>
## 1 1976 1
## 2 1980 1
## 3 1984 1
## 4 1988 1.
## 5 1996 1
## 6 2000 1.00
## 7 2004 1
## 8 2008 1.00
## 9 2012 1
## 10 2016 1.00
# race.degreeのような形で変数を扱いたくない場合も
# raceとdegreeをそれぞれ別の列で扱いたい
# separate()関数を使うことで対象列の名前を二つの列に分割できる
out_mrg <- gss_wt %>%
filter(year %in% seq(1976, 2016, by = 4)) %>%
mutate(racedeg = interaction(race, degree)) %>%
group_by(year, racedeg) %>%
summarize(prop = survey_mean(na.rm = TRUE)) %>%
separate(racedeg, sep = "\\.", into = c("race", "degree"))
out_mrg
## # A tibble: 155 x 5
## # Groups: year [10]
## year race degree prop prop_se
## <dbl> <chr> <chr> <dbl> <dbl>
## 1 1976 White Lt High School 0.297 0.0146
## 2 1976 Black Lt High School 0.0470 0.00837
## 3 1976 Other Lt High School 0.00194 0.00138
## 4 1976 White High School 0.469 0.0159
## 5 1976 Black High School 0.0282 0.00593
## 6 1976 Other High School 0.00324 0.00166
## 7 1976 White Junior College 0.0117 0.00268
## 8 1976 Black Junior College 0.00356 0.00162
## 9 1976 White Bachelor 0.0916 0.00883
## 10 1976 Black Bachelor 0.00486 0.00213
## # … with 145 more rows
年別に層別化した場合,どのグラフを使うと良いのかは専門家でも意見が分かれる 単年度なら棒グラフでも良いが,長期間ののデータなら折れ線グラフを使うのも良い
# GSSの結果をダイナマイトプロット(棒グラフ±SE)で図示する
p <- ggplot(data = subset(out_grp, race %nin% "Other"),
mapping = aes(x = degree,
y = prop,
ymin = prop - 2*prop_se,
ymax = prop + 2*prop_se,
fill = race,
color = race,
group = race))
dodge <- position_dodge(width = 0.9)
p + geom_col(position = dodge, alpha = 0.2) +
geom_errorbar(position = dodge, width = 0.2) +
scale_x_discrete(labels = scales::wrap_format(10)) + # 長いラベルを行単位に分割する
scale_y_continuous(labels = scales::percent) +
scale_color_brewer(type = "qual", palette = "Dark2") +
scale_fill_brewer(type = "qual", palette = "Dark2") +
labs(x = NULL, y = "Percent",
title = "Educational Attainment by Race",
fill = "Race", color = "Race") +
facet_wrap(~ year, ncol = 2) +
theme(legend.position = "top")
# NAが残ったり,グラフの挙動なんかおかしい?
各年内の内訳を見るには簡単. しかしながら年の経過で比較するのは非常に難しい
次に,教育歴をfacetし,x軸を年にする
data <- out_grp %>%
subset(race %nin% "Other") %>%
subset(degree %nin% NA)
head(data)
## # A tibble: 6 x 5
## # Groups: year, race [2]
## year race degree prop prop_se
## <dbl> <fct> <fct> <dbl> <dbl>
## 1 1976 White Lt High School 0.327 0.0160
## 2 1976 White High School 0.517 0.0161
## 3 1976 White Junior College 0.0128 0.00298
## 4 1976 White Bachelor 0.101 0.00955
## 5 1976 White Graduate 0.0392 0.00642
## 6 1976 Black Lt High School 0.558 0.0603
head(out_grp)
## # A tibble: 6 x 5
## # Groups: year, race [1]
## year race degree prop prop_se
## <dbl> <fct> <fct> <dbl> <dbl>
## 1 1976 White Lt High School 0.327 0.0160
## 2 1976 White High School 0.517 0.0161
## 3 1976 White Junior College 0.0128 0.00298
## 4 1976 White Bachelor 0.101 0.00955
## 5 1976 White Graduate 0.0392 0.00642
## 6 1976 White <NA> 0.00285 0.00151
p <- ggplot(data = data,
mapping = aes(x = year, y = prop,
ymin = prop - 2*prop_se,
ymax = prop + 2*prop_se,
color = race,
fill = race,
group = race))
p + geom_ribbon(alpha = 0.3, aes(color = NULL)) +
geom_line() +
facet_wrap(~ degree, ncol = 1) +
scale_y_continuous(labels = scales::percent) +
labs(x = NULL, y = "Percent",
title = "Educational Attainment \nby Race",
subtitle = "GSS 1976-2016",
fill = "Race", color = "Race") +
theme(legend.position = "top")
# graduateがの1976年のデータがない?
モデルを推定し,その結果を可視化する時に難しいのは,モデルから正しく数値を計算・抽出すること
そのためにもモデルの理解や関数の中身を理解する必要がある
glimpse(gapminder)
## Rows: 1,704
## Columns: 6
## $ country <fct> Afghanistan, Afghanistan, Afghanistan, Afghanistan, Afghan…
## $ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia…
## $ year <int> 1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997…
## $ lifeExp <dbl> 28.801, 30.332, 31.997, 34.020, 36.088, 38.438, 39.854, 40…
## $ pop <int> 8425333, 9240934, 10267083, 11537966, 13079460, 14880372, …
## $ gdpPercap <dbl> 779.4453, 820.8530, 853.1007, 836.1971, 739.9811, 786.1134…
out <- lm(formula = lifeExp ~ log(gdpPercap) + pop + continent,
data = gapminder)
summary(out)
##
## Call:
## lm(formula = lifeExp ~ log(gdpPercap) + pop + continent, data = gapminder)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.7090 -3.4832 0.4396 4.4062 18.6914
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.476e+00 1.352e+00 1.091 0.275
## log(gdpPercap) 6.524e+00 1.824e-01 35.778 < 2e-16 ***
## pop 9.990e-09 1.650e-09 6.056 1.72e-09 ***
## continentAmericas 6.729e+00 5.507e-01 12.218 < 2e-16 ***
## continentAsia 5.157e+00 4.880e-01 10.567 < 2e-16 ***
## continentEurope 9.290e+00 5.997e-01 15.491 < 2e-16 ***
## continentOceania 8.965e+00 1.521e+00 5.896 4.49e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.965 on 1697 degrees of freedom
## Multiple R-squared: 0.7102, Adjusted R-squared: 0.7092
## F-statistic: 693.3 on 6 and 1697 DF, p-value: < 2.2e-16
plot(out, which = c(1, 2), ask = FALSE) # wichiで最初の1, 2のグラフを出力すると指定した
coefplotパッケージを使う
library(coefplot)
out <- lm(formula = lifeExp ~ log(gdpPercap) + log(pop) + continent,
data = gapminder)
coefplot(out, sort = "magnitude", intercept = FALSE)
inferパッケージ - 開発の初期段階だが,すでに有用なものがある
GGallyパッケージ - 複雑な図の作成を簡略化するためのパッケージ - あくまでも研究者がデータセットの内容を迅速に確認するため - さらなる調査に向けて探索的にデータを読み解いていくのが目的
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
organdata_sm <- organdata %>%
select(donors, pop_dens, pubhealth,
roads, consent_law)
?ggpairs
# upperやlowerは
ggpairs(data = organdata_sm,
mapping = aes(color = consent_law),
upper = list(continuous = wrap("density"), combo = "box_no_facet"),
lower = list(continuous = wrap("points"), combo = wrap("dot_no_facet")))
## Warning: Removed 34 rows containing non-finite values (stat_density).
## Warning: Removed 34 rows containing non-finite values (stat_density2d).
## Warning: Removed 37 rows containing non-finite values (stat_density2d).
## Warning: Removed 34 rows containing non-finite values (stat_density2d).
## Warning: Removed 34 rows containing non-finite values (stat_boxplot).
## Warning: Removed 34 rows containing missing values (geom_point).
## Warning: Removed 17 rows containing non-finite values (stat_density).
## Warning: Removed 21 rows containing non-finite values (stat_density2d).
## Warning: Removed 17 rows containing non-finite values (stat_density2d).
## Warning: Removed 17 rows containing non-finite values (stat_boxplot).
## Warning: Removed 37 rows containing missing values (geom_point).
## Warning: Removed 21 rows containing missing values (geom_point).
## Warning: Removed 21 rows containing non-finite values (stat_density).
## Warning: Removed 21 rows containing non-finite values (stat_density2d).
## Warning: Removed 21 rows containing non-finite values (stat_boxplot).
## Warning: Removed 34 rows containing missing values (geom_point).
## Warning: Removed 17 rows containing missing values (geom_point).
## Warning: Removed 21 rows containing missing values (geom_point).
## Warning: Removed 17 rows containing non-finite values (stat_density).
## Warning: Removed 17 rows containing non-finite values (stat_boxplot).
## Warning: Removed 34 rows containing missing values (geom_point).
## Warning: Removed 17 rows containing missing values (geom_point).
## Warning: Removed 21 rows containing missing values (geom_point).
## Warning: Removed 17 rows containing missing values (geom_point).
ggpairs(data = organdata_sm,
mapping = aes(color = consent_law),
upper = list(continuous = wrap("density", combo = "box_no_facet")))
## Warning: Removed 34 rows containing non-finite values (stat_density).
## Warning: Ignoring unknown parameters: combo
## Warning: Removed 34 rows containing non-finite values (stat_density2d).
## Warning: Ignoring unknown parameters: combo
## Warning: Removed 37 rows containing non-finite values (stat_density2d).
## Warning: Ignoring unknown parameters: combo
## Warning: Removed 34 rows containing non-finite values (stat_density2d).
## Warning: Removed 34 rows containing non-finite values (stat_boxplot).
## Warning: Removed 34 rows containing missing values (geom_point).
## Warning: Removed 17 rows containing non-finite values (stat_density).
## Warning: Ignoring unknown parameters: combo
## Warning: Removed 21 rows containing non-finite values (stat_density2d).
## Warning: Ignoring unknown parameters: combo
## Warning: Removed 17 rows containing non-finite values (stat_density2d).
## Warning: Removed 17 rows containing non-finite values (stat_boxplot).
## Warning: Removed 37 rows containing missing values (geom_point).
## Warning: Removed 21 rows containing missing values (geom_point).
## Warning: Removed 21 rows containing non-finite values (stat_density).
## Warning: Ignoring unknown parameters: combo
## Warning: Removed 21 rows containing non-finite values (stat_density2d).
## Warning: Removed 21 rows containing non-finite values (stat_boxplot).
## Warning: Removed 34 rows containing missing values (geom_point).
## Warning: Removed 17 rows containing missing values (geom_point).
## Warning: Removed 21 rows containing missing values (geom_point).
## Warning: Removed 17 rows containing non-finite values (stat_density).
## Warning: Removed 17 rows containing non-finite values (stat_boxplot).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 34 rows containing non-finite values (stat_bin).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 17 rows containing non-finite values (stat_bin).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 21 rows containing non-finite values (stat_bin).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 17 rows containing non-finite values (stat_bin).